libbb/unicode_wcwidth.c

   1 /*
   2  * This is an implementation of wcwidth() and wcswidth() (defined in
   3  * IEEE Std 1002.1-2001) for Unicode.
   4  *
   5  * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
   6  * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
   7  *
   8  * In fixed-width output devices, Latin characters all occupy a single
   9  * "cell" position of equal width, whereas ideographic CJK characters
  10  * occupy two such cells. Interoperability between terminal-line
  11  * applications and (teletype-style) character terminals using the
  12  * UTF-8 encoding requires agreement on which character should advance
  13  * the cursor by how many cell positions. No established formal
  14  * standards exist at present on which Unicode character shall occupy
  15  * how many cell positions on character terminals. These routines are
  16  * a first attempt of defining such behavior based on simple rules
  17  * applied to data provided by the Unicode Consortium.
  18  *
  19  * For some graphical characters, the Unicode standard explicitly
  20  * defines a character-cell width via the definition of the East Asian
  21  * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
  22  * In all these cases, there is no ambiguity about which width a
  23  * terminal shall use. For characters in the East Asian Ambiguous (A)
  24  * class, the width choice depends purely on a preference of backward
  25  * compatibility with either historic CJK or Western practice.
  26  * Choosing single-width for these characters is easy to justify as
  27  * the appropriate long-term solution, as the CJK practice of
  28  * displaying these characters as double-width comes from historic
  29  * implementation simplicity (8-bit encoded characters were displayed
  30  * single-width and 16-bit ones double-width, even for Greek,
  31  * Cyrillic, etc.) and not any typographic considerations.
  32  *
  33  * Much less clear is the choice of width for the Not East Asian
  34  * (Neutral) class. Existing practice does not dictate a width for any
  35  * of these characters. It would nevertheless make sense
  36  * typographically to allocate two character cells to characters such
  37  * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
  38  * represented adequately with a single-width glyph. The following
  39  * routines at present merely assign a single-cell width to all
  40  * neutral characters, in the interest of simplicity. This is not
  41  * entirely satisfactory and should be reconsidered before
  42  * establishing a formal standard in this area. At the moment, the
  43  * decision which Not East Asian (Neutral) characters should be
  44  * represented by double-width glyphs cannot yet be answered by
  45  * applying a simple rule from the Unicode database content. Setting
  46  * up a proper standard for the behavior of UTF-8 character terminals
  47  * will require a careful analysis not only of each Unicode character,
  48  * but also of each presentation form, something the author of these
  49  * routines has avoided to do so far.
  50  *
  51  * http://www.unicode.org/unicode/reports/tr11/
  52  *
  53  * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
  54  *
  55  * Permission to use, copy, modify, and distribute this software
  56  * for any purpose and without fee is hereby granted. The author
  57  * disclaims all warranties with regard to this software.
  58  *
  59  * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
  60  */
  61
  62 /* Assigned Unicode character ranges:
  63  * Plane Range
  64  * 0       0000–FFFF   Basic Multilingual Plane
  65  * 1      10000–1FFFF  Supplementary Multilingual Plane
  66  * 2      20000–2FFFF  Supplementary Ideographic Plane
  67  * 3      30000-3FFFF  Tertiary Ideographic Plane (no chars assigned yet)
  68  * 4-13   40000–DFFFF  currently unassigned
  69  * 14     E0000–EFFFF  Supplementary Special-purpose Plane
  70  * 15     F0000–FFFFF  Supplementary Private Use Area-A
  71  * 16    100000–10FFFF Supplementary Private Use Area-B
  72  *
  73  * "Supplementary Special-purpose Plane currently contains non-graphical
  74  * characters in two blocks of 128 and 240 characters. The first block
  75  * is for language tag characters for use when language cannot be indicated
  76  * through other protocols (such as the xml:lang  attribute in XML).
  77  * The other block contains glyph variation selectors to indicate
  78  * an alternate glyph for a character that cannot be determined by context."
  79  *
  80  * In simpler terms: it is a tool to fix the "Han unification" mess
  81  * created by Unicode committee, to select Chinese/Japanese/Korean/Taiwan
  82  * version of a character. (They forgot that the whole purpose of the Unicode
  83  * was to be able to write all chars in one charset without such tricks).
  84  * Until East Asian users say it is actually necessary to support these
  85  * code points in console applications like busybox
  86  * (i.e. do these chars ever appear in filenames, hostnames, text files
  87  * and such?), we are treating these code points as invalid.
  88  *
  89  * Tertiary Ideographic Plane is also ignored for now,
  90  * until Unicode committee assigns something there.
  91  */
  92
  93 #if LAST_SUPPORTED_WCHAR >= 0x300
  94 struct interval {
  95         uint16_t first;
  96         uint16_t last;
  97 };
  98
  99 /* auxiliary function for binary search in interval table */
 100 static int in_interval_table(unsigned ucs, const struct interval *table, unsigned max)
 101 {
 102         unsigned min;
 103         unsigned mid;
 104
 105         if (ucs < table[0].first || ucs > table[max].last)
 106                 return 0;
 107
 108         min = 0;
 109         while (max >= min) {
 110                 mid = (min + max) / 2;
 111                 if (ucs > table[mid].last)
 112                         min = mid + 1;
 113                 else if (ucs < table[mid].first)
 114                         max = mid - 1;
 115                 else
 116                         return 1;
 117         }
 118         return 0;
 119 }
 120
 121 static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
 122 {
 123         unsigned min;
 124         unsigned mid;
 125         unsigned first, last;
 126
 127         first = table[0] >> 2;
 128         last = first + (table[0] & 3);
 129         if (ucs < first || ucs > last)
 130                 return 0;
 131
 132         min = 0;
 133         while (max >= min) {
 134                 mid = (min + max) / 2;
 135                 first = table[mid] >> 2;
 136                 last = first + (table[mid] & 3);
 137                 if (ucs > last)
 138                         min = mid + 1;
 139                 else if (ucs < first)
 140                         max = mid - 1;
 141                 else
 142                         return 1;
 143         }
 144         return 0;
 145 }
 146 #endif
 147
 148
 149 /* The following two functions define the column width of an ISO 10646
 150  * character as follows:
 151  *
 152  *    - The null character (U+0000) has a column width of 0.
 153  *
 154  *    - Other C0/C1 control characters and DEL will lead to a return
 155  *      value of -1.
 156  *
 157  *    - Non-spacing and enclosing combining characters (general
 158  *      category code Mn or Me in the Unicode database) have a
 159  *      column width of 0.
 160  *
 161  *    - SOFT HYPHEN (U+00AD) has a column width of 1.
 162  *
 163  *    - Other format characters (general category code Cf in the Unicode
 164  *      database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
 165  *
 166  *    - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
 167  *      have a column width of 0.
 168  *
 169  *    - Spacing characters in the East Asian Wide (W) or East Asian
 170  *      Full-width (F) category as defined in Unicode Technical
 171  *      Report #11 have a column width of 2.
 172  *
 173  *    - All remaining characters (including all printable
 174  *      ISO 8859-1 and WGL4 characters, Unicode control characters,
 175  *      etc.) have a column width of 1.
 176  *
 177  * This implementation assumes that wchar_t characters are encoded
 178  * in ISO 10646.
 179  */
 180 static int wcwidth(unsigned ucs)
 181 {
 182 #if LAST_SUPPORTED_WCHAR >= 0x300
 183         /* sorted list of non-overlapping intervals of non-spacing characters */
 184         /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
 185         static const struct interval combining[] = {
 186 #define BIG_(a,b) { a, b },
 187 #define PAIR(a,b)
 188                 /* PAIR if < 0x4000 and no more than 4 chars big */
 189                 BIG_(0x0300, 0x036F)
 190                 PAIR(0x0483, 0x0486)
 191                 PAIR(0x0488, 0x0489)
 192                 BIG_(0x0591, 0x05BD)
 193                 PAIR(0x05BF, 0x05BF)
 194                 PAIR(0x05C1, 0x05C2)
 195                 PAIR(0x05C4, 0x05C5)
 196                 PAIR(0x05C7, 0x05C7)
 197                 PAIR(0x0600, 0x0603)
 198                 BIG_(0x0610, 0x0615)
 199                 BIG_(0x064B, 0x065E)
 200                 PAIR(0x0670, 0x0670)
 201                 BIG_(0x06D6, 0x06E4)
 202                 PAIR(0x06E7, 0x06E8)
 203                 PAIR(0x06EA, 0x06ED)
 204                 PAIR(0x070F, 0x070F)
 205                 PAIR(0x0711, 0x0711)
 206                 BIG_(0x0730, 0x074A)
 207                 BIG_(0x07A6, 0x07B0)
 208                 BIG_(0x07EB, 0x07F3)
 209                 PAIR(0x0901, 0x0902)
 210                 PAIR(0x093C, 0x093C)
 211                 BIG_(0x0941, 0x0948)
 212                 PAIR(0x094D, 0x094D)
 213                 PAIR(0x0951, 0x0954)
 214                 PAIR(0x0962, 0x0963)
 215                 PAIR(0x0981, 0x0981)
 216                 PAIR(0x09BC, 0x09BC)
 217                 PAIR(0x09C1, 0x09C4)
 218                 PAIR(0x09CD, 0x09CD)
 219                 PAIR(0x09E2, 0x09E3)
 220                 PAIR(0x0A01, 0x0A02)
 221                 PAIR(0x0A3C, 0x0A3C)
 222                 PAIR(0x0A41, 0x0A42)
 223                 PAIR(0x0A47, 0x0A48)
 224                 PAIR(0x0A4B, 0x0A4D)
 225                 PAIR(0x0A70, 0x0A71)
 226                 PAIR(0x0A81, 0x0A82)
 227                 PAIR(0x0ABC, 0x0ABC)
 228                 BIG_(0x0AC1, 0x0AC5)
 229                 PAIR(0x0AC7, 0x0AC8)
 230                 PAIR(0x0ACD, 0x0ACD)
 231                 PAIR(0x0AE2, 0x0AE3)
 232                 PAIR(0x0B01, 0x0B01)
 233                 PAIR(0x0B3C, 0x0B3C)
 234                 PAIR(0x0B3F, 0x0B3F)
 235                 PAIR(0x0B41, 0x0B43)
 236                 PAIR(0x0B4D, 0x0B4D)
 237                 PAIR(0x0B56, 0x0B56)
 238                 PAIR(0x0B82, 0x0B82)
 239                 PAIR(0x0BC0, 0x0BC0)
 240                 PAIR(0x0BCD, 0x0BCD)
 241                 PAIR(0x0C3E, 0x0C40)
 242                 PAIR(0x0C46, 0x0C48)
 243                 PAIR(0x0C4A, 0x0C4D)
 244                 PAIR(0x0C55, 0x0C56)
 245                 PAIR(0x0CBC, 0x0CBC)
 246                 PAIR(0x0CBF, 0x0CBF)
 247                 PAIR(0x0CC6, 0x0CC6)
 248                 PAIR(0x0CCC, 0x0CCD)
 249                 PAIR(0x0CE2, 0x0CE3)
 250                 PAIR(0x0D41, 0x0D43)
 251                 PAIR(0x0D4D, 0x0D4D)
 252                 PAIR(0x0DCA, 0x0DCA)
 253                 PAIR(0x0DD2, 0x0DD4)
 254                 PAIR(0x0DD6, 0x0DD6)
 255                 PAIR(0x0E31, 0x0E31)
 256                 BIG_(0x0E34, 0x0E3A)
 257                 BIG_(0x0E47, 0x0E4E)
 258                 PAIR(0x0EB1, 0x0EB1)
 259                 BIG_(0x0EB4, 0x0EB9)
 260                 PAIR(0x0EBB, 0x0EBC)
 261                 BIG_(0x0EC8, 0x0ECD)
 262                 PAIR(0x0F18, 0x0F19)
 263                 PAIR(0x0F35, 0x0F35)
 264                 PAIR(0x0F37, 0x0F37)
 265                 PAIR(0x0F39, 0x0F39)
 266                 BIG_(0x0F71, 0x0F7E)
 267                 BIG_(0x0F80, 0x0F84)
 268                 PAIR(0x0F86, 0x0F87)
 269                 PAIR(0x0FC6, 0x0FC6)
 270                 BIG_(0x0F90, 0x0F97)
 271                 BIG_(0x0F99, 0x0FBC)
 272                 PAIR(0x102D, 0x1030)
 273                 PAIR(0x1032, 0x1032)
 274                 PAIR(0x1036, 0x1037)
 275                 PAIR(0x1039, 0x1039)
 276                 PAIR(0x1058, 0x1059)
 277                 BIG_(0x1160, 0x11FF)
 278                 PAIR(0x135F, 0x135F)
 279                 PAIR(0x1712, 0x1714)
 280                 PAIR(0x1732, 0x1734)
 281                 PAIR(0x1752, 0x1753)
 282                 PAIR(0x1772, 0x1773)
 283                 PAIR(0x17B4, 0x17B5)
 284                 BIG_(0x17B7, 0x17BD)
 285                 PAIR(0x17C6, 0x17C6)
 286                 BIG_(0x17C9, 0x17D3)
 287                 PAIR(0x17DD, 0x17DD)
 288                 PAIR(0x180B, 0x180D)
 289                 PAIR(0x18A9, 0x18A9)
 290                 PAIR(0x1920, 0x1922)
 291                 PAIR(0x1927, 0x1928)
 292                 PAIR(0x1932, 0x1932)
 293                 PAIR(0x1939, 0x193B)
 294                 PAIR(0x1A17, 0x1A18)
 295                 PAIR(0x1B00, 0x1B03)
 296                 PAIR(0x1B34, 0x1B34)
 297                 BIG_(0x1B36, 0x1B3A)
 298                 PAIR(0x1B3C, 0x1B3C)
 299                 PAIR(0x1B42, 0x1B42)
 300                 BIG_(0x1B6B, 0x1B73)
 301                 BIG_(0x1DC0, 0x1DCA)
 302                 PAIR(0x1DFE, 0x1DFF)
 303                 BIG_(0x200B, 0x200F)
 304                 BIG_(0x202A, 0x202E)
 305                 PAIR(0x2060, 0x2063)
 306                 BIG_(0x206A, 0x206F)
 307                 BIG_(0x20D0, 0x20EF)
 308                 BIG_(0x302A, 0x302F)
 309                 PAIR(0x3099, 0x309A)
 310                 /* Too big to be packed in PAIRs: */
 311                 { 0xA806, 0xA806 },
 312                 { 0xA80B, 0xA80B },
 313                 { 0xA825, 0xA826 },
 314                 { 0xFB1E, 0xFB1E },
 315                 { 0xFE00, 0xFE0F },
 316                 { 0xFE20, 0xFE23 },
 317                 { 0xFEFF, 0xFEFF },
 318                 { 0xFFF9, 0xFFFB }
 319 #undef BIG_
 320 #undef PAIR
 321         };
 322         static const uint16_t combining1[] = {
 323 #define BIG_(a,b)
 324 #define PAIR(a,b) (a << 2) | (b-a),
 325                 /* Exact copy-n-paste of the above: */
 326                 BIG_(0x0300, 0x036F)
 327                 PAIR(0x0483, 0x0486)
 328                 PAIR(0x0488, 0x0489)
 329                 BIG_(0x0591, 0x05BD)
 330                 PAIR(0x05BF, 0x05BF)
 331                 PAIR(0x05C1, 0x05C2)
 332                 PAIR(0x05C4, 0x05C5)
 333                 PAIR(0x05C7, 0x05C7)
 334                 PAIR(0x0600, 0x0603)
 335                 BIG_(0x0610, 0x0615)
 336                 BIG_(0x064B, 0x065E)
 337                 PAIR(0x0670, 0x0670)
 338                 BIG_(0x06D6, 0x06E4)
 339                 PAIR(0x06E7, 0x06E8)
 340                 PAIR(0x06EA, 0x06ED)
 341                 PAIR(0x070F, 0x070F)
 342                 PAIR(0x0711, 0x0711)
 343                 BIG_(0x0730, 0x074A)
 344                 BIG_(0x07A6, 0x07B0)
 345                 BIG_(0x07EB, 0x07F3)
 346                 PAIR(0x0901, 0x0902)
 347                 PAIR(0x093C, 0x093C)
 348                 BIG_(0x0941, 0x0948)
 349                 PAIR(0x094D, 0x094D)
 350                 PAIR(0x0951, 0x0954)
 351                 PAIR(0x0962, 0x0963)
 352                 PAIR(0x0981, 0x0981)
 353                 PAIR(0x09BC, 0x09BC)
 354                 PAIR(0x09C1, 0x09C4)
 355                 PAIR(0x09CD, 0x09CD)
 356                 PAIR(0x09E2, 0x09E3)
 357                 PAIR(0x0A01, 0x0A02)
 358                 PAIR(0x0A3C, 0x0A3C)
 359                 PAIR(0x0A41, 0x0A42)
 360                 PAIR(0x0A47, 0x0A48)
 361                 PAIR(0x0A4B, 0x0A4D)
 362                 PAIR(0x0A70, 0x0A71)
 363                 PAIR(0x0A81, 0x0A82)
 364                 PAIR(0x0ABC, 0x0ABC)
 365                 BIG_(0x0AC1, 0x0AC5)
 366                 PAIR(0x0AC7, 0x0AC8)
 367                 PAIR(0x0ACD, 0x0ACD)
 368                 PAIR(0x0AE2, 0x0AE3)
 369                 PAIR(0x0B01, 0x0B01)
 370                 PAIR(0x0B3C, 0x0B3C)
 371                 PAIR(0x0B3F, 0x0B3F)
 372                 PAIR(0x0B41, 0x0B43)
 373                 PAIR(0x0B4D, 0x0B4D)
 374                 PAIR(0x0B56, 0x0B56)
 375                 PAIR(0x0B82, 0x0B82)
 376                 PAIR(0x0BC0, 0x0BC0)
 377                 PAIR(0x0BCD, 0x0BCD)
 378                 PAIR(0x0C3E, 0x0C40)
 379                 PAIR(0x0C46, 0x0C48)
 380                 PAIR(0x0C4A, 0x0C4D)
 381                 PAIR(0x0C55, 0x0C56)
 382                 PAIR(0x0CBC, 0x0CBC)
 383                 PAIR(0x0CBF, 0x0CBF)
 384                 PAIR(0x0CC6, 0x0CC6)
 385                 PAIR(0x0CCC, 0x0CCD)
 386                 PAIR(0x0CE2, 0x0CE3)
 387                 PAIR(0x0D41, 0x0D43)
 388                 PAIR(0x0D4D, 0x0D4D)
 389                 PAIR(0x0DCA, 0x0DCA)
 390                 PAIR(0x0DD2, 0x0DD4)
 391                 PAIR(0x0DD6, 0x0DD6)
 392                 PAIR(0x0E31, 0x0E31)
 393                 BIG_(0x0E34, 0x0E3A)
 394                 BIG_(0x0E47, 0x0E4E)
 395                 PAIR(0x0EB1, 0x0EB1)
 396                 BIG_(0x0EB4, 0x0EB9)
 397                 PAIR(0x0EBB, 0x0EBC)
 398                 BIG_(0x0EC8, 0x0ECD)
 399                 PAIR(0x0F18, 0x0F19)
 400                 PAIR(0x0F35, 0x0F35)
 401                 PAIR(0x0F37, 0x0F37)
 402                 PAIR(0x0F39, 0x0F39)
 403                 BIG_(0x0F71, 0x0F7E)
 404                 BIG_(0x0F80, 0x0F84)
 405                 PAIR(0x0F86, 0x0F87)
 406                 PAIR(0x0FC6, 0x0FC6)
 407                 BIG_(0x0F90, 0x0F97)
 408                 BIG_(0x0F99, 0x0FBC)
 409                 PAIR(0x102D, 0x1030)
 410                 PAIR(0x1032, 0x1032)
 411                 PAIR(0x1036, 0x1037)
 412                 PAIR(0x1039, 0x1039)
 413                 PAIR(0x1058, 0x1059)
 414                 BIG_(0x1160, 0x11FF)
 415                 PAIR(0x135F, 0x135F)
 416                 PAIR(0x1712, 0x1714)
 417                 PAIR(0x1732, 0x1734)
 418                 PAIR(0x1752, 0x1753)
 419                 PAIR(0x1772, 0x1773)
 420                 PAIR(0x17B4, 0x17B5)
 421                 BIG_(0x17B7, 0x17BD)
 422                 PAIR(0x17C6, 0x17C6)
 423                 BIG_(0x17C9, 0x17D3)
 424                 PAIR(0x17DD, 0x17DD)
 425                 PAIR(0x180B, 0x180D)
 426                 PAIR(0x18A9, 0x18A9)
 427                 PAIR(0x1920, 0x1922)
 428                 PAIR(0x1927, 0x1928)
 429                 PAIR(0x1932, 0x1932)
 430                 PAIR(0x1939, 0x193B)
 431                 PAIR(0x1A17, 0x1A18)
 432                 PAIR(0x1B00, 0x1B03)
 433                 PAIR(0x1B34, 0x1B34)
 434                 BIG_(0x1B36, 0x1B3A)
 435                 PAIR(0x1B3C, 0x1B3C)
 436                 PAIR(0x1B42, 0x1B42)
 437                 BIG_(0x1B6B, 0x1B73)
 438                 BIG_(0x1DC0, 0x1DCA)
 439                 PAIR(0x1DFE, 0x1DFF)
 440                 BIG_(0x200B, 0x200F)
 441                 BIG_(0x202A, 0x202E)
 442                 PAIR(0x2060, 0x2063)
 443                 BIG_(0x206A, 0x206F)
 444                 BIG_(0x20D0, 0x20EF)
 445                 BIG_(0x302A, 0x302F)
 446                 PAIR(0x3099, 0x309A)
 447 #undef BIG_
 448 #undef PAIR
 449         };
 450         struct CHECK {
 451 #define BIG_(a,b) char big##a[b-a <= 3 ? -1 : 1];
 452 #define PAIR(a,b) char pair##a[b-a > 3 ? -1 : 1];
 453                 /* Copy-n-paste it here again to verify correctness */
 454 #undef BIG_
 455 #undef PAIR
 456         };
 457 #endif
 458
 459         if (ucs == 0)
 460                 return 0;
 461
 462         /* Test for 8-bit control characters (00-1f, 80-9f, 7f) */
 463         if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
 464                 return -1;
 465         /* Quick abort if it is an obviously invalid char */
 466         if (ucs > LAST_SUPPORTED_WCHAR)
 467                 return -1;
 468
 469         /* Optimization: no combining chars below 0x300 */
 470         if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
 471                 return 1;
 472
 473 #if LAST_SUPPORTED_WCHAR >= 0x300
 474         /* Binary search in table of non-spacing characters */
 475         if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
 476                 return 0;
 477         if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
 478                 return 0;
 479
 480         /* Optimization: all chars below 0x1100 are not double-width */
 481         if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
 482                 return 1;
 483
 484 # if LAST_SUPPORTED_WCHAR >= 0x1100
 485         /* Invalid code points: */
 486         /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
 487         /* Private Use Area (e000..f8ff) */
 488         /* Noncharacters fdd0..fdef */
 489         if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
 490          || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
 491         ) {
 492                 return -1;
 493         }
 494         /* 0xfffe and 0xffff in every plane are invalid */
 495         if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
 496                 return -1;
 497         }
 498
 499 #  if LAST_SUPPORTED_WCHAR >= 0x10000
 500         if (ucs >= 0x10000) {
 501                 /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
 502                 static const struct interval combining0x10000[] = {
 503                         { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
 504                         { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
 505                         { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
 506                         { 0xD242, 0xD244 }
 507                 };
 508                 /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
 509                 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
 510                         return 0;
 511                 /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
 512                 if (LAST_SUPPORTED_WCHAR >= 0xE0001
 513                  && (  ucs == 0xE0001
 514                     || (ucs >= 0xE0020 && ucs <= 0xE007F)
 515                     || (ucs >= 0xE0100 && ucs <= 0xE01EF)
 516                     )
 517                 ) {
 518                         return 0;
 519                 }
 520         }
 521 #  endif
 522
 523         /* If we arrive here, ucs is not a combining or C0/C1 control character.
 524          * Check whether it's 1 char or 2-shar wide.
 525          */
 526         return 1 +
 527                 (  (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
 528                 || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
 529                 || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
 530                 || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
 531 #  if LAST_SUPPORTED_WCHAR >= 0xac00
 532                 || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
 533                 || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
 534                 || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
 535                 || (ucs >= 0xfe30 && ucs <= 0xfe6f) /* CJK Compatibility Forms */
 536                 || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */
 537                 || (ucs >= 0xffe0 && ucs <= 0xffe6)
 538                 || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */
 539 #  endif
 540                 );
 541 # endif /* >= 0x1100 */
 542 #endif /* >= 0x300 */
 543 }