From: Rich Felker Date: Tue, 24 Apr 2012 08:23:55 +0000 (-0400) Subject: new wcwidth implementation (fast table-based) X-Git-Tag: v0.8.10~8 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=1b0ce9af6d2aa7b92edaf3e9c631cb635bae22bd;p=oweals%2Fmusl.git new wcwidth implementation (fast table-based) i tried to go with improving the old binary-search-based algorithm, but between growth in the number of ranges, bad performance, and lack of confidence in the binary search code's stability under changes in the table, i decided it was worth the extra 1.8k to have something clean and maintainable. also note that, like the alpha and punct tables, there's definitely room to optimize the nonspacing/wide tables by overlapping subtables. this is not a high priority, but i've begun looking into how to do it, and i suspect the table sizes can be roughly halved. if that turns out to be true, the new, fast, table-based implementation will be roughly the same size as if i had just extended the old binary search one. --- diff --git a/src/ctype/nonspacing.h b/src/ctype/nonspacing.h new file mode 100644 index 00000000..4c25ef51 --- /dev/null +++ b/src/ctype/nonspacing.h @@ -0,0 +1,62 @@ +16,16,16,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,16,32,16,16,16,33,34,35, +36,37,38,39,16,16,40,16,16,16,16,16,16,16,16,16,16,16,41,42,16,16,43,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,44,16,45,46,47,48,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,49,16,16,50,51,16,52,16,16, +16,16,16,16,16,16,53,16,16,16,16,16,54,55,16,16,16,16,56,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,57,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,58,59,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248,3,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,254,255,255,255,255,191, +182,0,0,0,0,0,0,0,31,0,255,7,0,0,0,0,0,248,255,255,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,192,191,159,61,0,0,0,128,2,0,0,0,255,255,255,7,0,0,0,0,0,0,0,0,0,0,192,255, +1,0,0,0,0,0,0,248,15,0,0,0,192,251,239,62,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,240,255,255,127,7,0,0,0,0,0,0,20,254,33,254,0,12,0,0,0,2,0,0,0,0,0, +0,16,30,32,0,0,12,0,0,0,6,0,0,0,0,0,0,16,134,57,2,0,0,0,35,0,6,0,0,0,0,0,0,16, +190,33,0,0,12,0,0,0,2,0,0,0,0,0,0,144,30,32,64,0,12,0,0,0,4,0,0,0,0,0,0,0,1, +32,0,0,0,0,0,0,0,0,0,0,0,0,0,192,193,61,96,0,12,0,0,0,0,0,0,0,0,0,0,144,64,48, +0,0,12,0,0,0,0,0,0,0,0,0,0,0,30,32,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,4,92,0,0,0, +0,0,0,0,0,0,0,0,242,7,128,127,0,0,0,0,0,0,0,0,0,0,0,0,242,27,0,63,0,0,0,0,0,0, +0,0,0,3,0,0,160,2,0,0,0,0,0,0,254,127,223,224,255,254,255,255,255,31,64,0,0,0, +0,0,0,0,0,0,0,0,0,224,253,102,0,0,0,195,1,0,30,0,100,32,0,32,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,224,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +28,0,0,0,28,0,0,0,12,0,0,0,12,0,0,0,0,0,0,0,176,63,64,254,15,32,0,0,0,0,0,56, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,135,1,4, +14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,1,0,0,0,0,0,0,64, +127,229,31,248,159,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,208,23,4,0,0, +0,0,248,15,0,3,0,0,0,60,11,0,0,0,0,0,0,64,163,3,0,0,0,0,0,0,240,207,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,247,255,253,33,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,255,255,255,255,127,0,0,240,0,248,0,0,0,124,0,0,0,0,0,0,31, +252,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0, +0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0,0,60,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,247,63,0,0,0,128,0,0,0,0,0, +0,0,0,0,0,3,0,68,8,0,0,96,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,255, +255,3,0,0,0,0,0,192,63,0,0,128,255,3,0,0,0,0,0,7,0,0,0,0,0,200,19,0,0,0,0,0,0, +0,0,0,0,0,0,0,126,102,0,8,16,0,0,0,0,0,0,0,0,0,0,0,0,157,193,2,0,0,0,0,48,64, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,33,0,0,0,0,0,64, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,127,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,32,110,240,0,0,0,0,0,135,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,255,127,0,0,0,0,0,0,0,3,0,0,0,0,0,120,38,0,0, +0,0,0,0,0,0,7,0,0,0,128,239,31,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,192,127,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40,191,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,128,3,248,255,231,15,0,0,0,60,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, diff --git a/src/ctype/wcwidth.c b/src/ctype/wcwidth.c index eaafd7cb..ab05cfec 100644 --- a/src/ctype/wcwidth.c +++ b/src/ctype/wcwidth.c @@ -1,186 +1,29 @@ -#include -#include #include -#define R(a,b,w) { (b), (w)/2, (b)-(a) } - -static const struct range { - uint32_t base:20; - uint32_t width:1; - uint32_t len:11; -} ranges[] = { - R(0x0300, 0x036F, 0), - R(0x0483, 0x0486, 0), - R(0x0488, 0x0489, 0), - R(0x0591, 0x05BD, 0), - R(0x05BF, 0x05BF, 0), - R(0x05C1, 0x05C2, 0), - R(0x05C4, 0x05C5, 0), - R(0x05C7, 0x05C7, 0), - R(0x0600, 0x0603, 0), - R(0x0610, 0x0615, 0), - R(0x064B, 0x065E, 0), - R(0x0670, 0x0670, 0), - R(0x06D6, 0x06E4, 0), - R(0x06E7, 0x06E8, 0), - R(0x06EA, 0x06ED, 0), - R(0x070F, 0x070F, 0), - R(0x0711, 0x0711, 0), - R(0x0730, 0x074A, 0), - R(0x07A6, 0x07B0, 0), - R(0x07EB, 0x07F3, 0), - R(0x0901, 0x0902, 0), - R(0x093C, 0x093C, 0), - R(0x0941, 0x0948, 0), - R(0x094D, 0x094D, 0), - R(0x0951, 0x0954, 0), - R(0x0962, 0x0963, 0), - R(0x0981, 0x0981, 0), - R(0x09BC, 0x09BC, 0), - R(0x09C1, 0x09C4, 0), - R(0x09CD, 0x09CD, 0), - R(0x09E2, 0x09E3, 0), - R(0x0A01, 0x0A02, 0), - R(0x0A3C, 0x0A3C, 0), - R(0x0A41, 0x0A42, 0), - R(0x0A47, 0x0A48, 0), - R(0x0A4B, 0x0A4D, 0), - R(0x0A70, 0x0A71, 0), - R(0x0A81, 0x0A82, 0), - R(0x0ABC, 0x0ABC, 0), - R(0x0AC1, 0x0AC5, 0), - R(0x0AC7, 0x0AC8, 0), - R(0x0ACD, 0x0ACD, 0), - R(0x0AE2, 0x0AE3, 0), - R(0x0B01, 0x0B01, 0), - R(0x0B3C, 0x0B3C, 0), - R(0x0B3F, 0x0B3F, 0), - R(0x0B41, 0x0B43, 0), - R(0x0B4D, 0x0B4D, 0), - R(0x0B56, 0x0B56, 0), - R(0x0B82, 0x0B82, 0), - R(0x0BC0, 0x0BC0, 0), - R(0x0BCD, 0x0BCD, 0), - R(0x0C3E, 0x0C40, 0), - R(0x0C46, 0x0C48, 0), - R(0x0C4A, 0x0C4D, 0), - R(0x0C55, 0x0C56, 0), - R(0x0CBC, 0x0CBC, 0), - R(0x0CBF, 0x0CBF, 0), - R(0x0CC6, 0x0CC6, 0), - R(0x0CCC, 0x0CCD, 0), - R(0x0CE2, 0x0CE3, 0), - R(0x0D41, 0x0D43, 0), - R(0x0D4D, 0x0D4D, 0), - R(0x0DCA, 0x0DCA, 0), - R(0x0DD2, 0x0DD4, 0), - R(0x0DD6, 0x0DD6, 0), - R(0x0E31, 0x0E31, 0), - R(0x0E34, 0x0E3A, 0), - R(0x0E47, 0x0E4E, 0), - R(0x0EB1, 0x0EB1, 0), - R(0x0EB4, 0x0EB9, 0), - R(0x0EBB, 0x0EBC, 0), - R(0x0EC8, 0x0ECD, 0), - R(0x0F18, 0x0F19, 0), - R(0x0F35, 0x0F35, 0), - R(0x0F37, 0x0F37, 0), - R(0x0F39, 0x0F39, 0), - R(0x0F71, 0x0F7E, 0), - R(0x0F80, 0x0F84, 0), - R(0x0F86, 0x0F87, 0), - R(0x0F90, 0x0F97, 0), - R(0x0F99, 0x0FBC, 0), - R(0x0FC6, 0x0FC6, 0), - R(0x102D, 0x1030, 0), - R(0x1032, 0x1032, 0), - R(0x1036, 0x1037, 0), - R(0x1039, 0x1039, 0), - R(0x1058, 0x1059, 0), - R(0x1100, 0x115F, 2), - R(0x1160, 0x11FF, 0), - R(0x135F, 0x135F, 0), - R(0x1712, 0x1714, 0), - R(0x1732, 0x1734, 0), - R(0x1752, 0x1753, 0), - R(0x1772, 0x1773, 0), - R(0x17B4, 0x17B5, 0), - R(0x17B7, 0x17BD, 0), - R(0x17C6, 0x17C6, 0), - R(0x17C9, 0x17D3, 0), - R(0x17DD, 0x17DD, 0), - R(0x180B, 0x180D, 0), - R(0x18A9, 0x18A9, 0), - R(0x1920, 0x1922, 0), - R(0x1927, 0x1928, 0), - R(0x1932, 0x1932, 0), - R(0x1939, 0x193B, 0), - R(0x1A17, 0x1A18, 0), - R(0x1B00, 0x1B03, 0), - R(0x1B34, 0x1B34, 0), - R(0x1B36, 0x1B3A, 0), - R(0x1B3C, 0x1B3C, 0), - R(0x1B42, 0x1B42, 0), - R(0x1B6B, 0x1B73, 0), - R(0x1DC0, 0x1DCA, 0), - R(0x1DFE, 0x1DFF, 0), - R(0x200B, 0x200F, 0), - R(0x202A, 0x202E, 0), - R(0x2060, 0x2063, 0), - R(0x206A, 0x206F, 0), - R(0x20D0, 0x20EF, 0), - R(0x2329, 0x232A, 2), - R(0x2E80, 0x3029, 2), - R(0x302A, 0x302F, 0), - R(0x3030, 0x303E, 2), - R(0x3099, 0x309A, 0), - R(0xA806, 0xA806, 0), - R(0xA80B, 0xA80B, 0), - R(0xA825, 0xA826, 0), - R(0xF900, 0xFAFF, 2), - R(0xFB1E, 0xFB1E, 0), - R(0xFE00, 0xFE0F, 0), - R(0xFE20, 0xFE23, 0), - R(0xFE30, 0xFE6F, 2), - R(0xFEFF, 0xFEFF, 0), - R(0xFF00, 0xFF60, 2), - R(0xFFE0, 0xFFE6, 2), - R(0x10A01, 0x10A03, 0), - R(0x10A05, 0x10A06, 0), - R(0x10A0C, 0x10A0F, 0), - R(0x10A38, 0x10A3A, 0), - R(0x10A3F, 0x10A3F, 0), - R(0x1D167, 0x1D169, 0), - R(0x1D173, 0x1D182, 0), - R(0x1D185, 0x1D18B, 0), - R(0x1D1AA, 0x1D1AD, 0), - R(0x1D242, 0x1D244, 0), - R(0xE0001, 0xE0001, 0), - R(0xE0020, 0xE007F, 0), - R(0xE0100, 0xE01EF, 0), +static unsigned char table[] = { +#include "nonspacing.h" }; -/* Note: because the len field is only 10 bits, we must special-case - * the two huge ranges of full width characters and exclude them - * from the binary search table. */ +static unsigned char wtable[] = { +#include "wide.h" +}; -int wcwidth(wchar_t wc) +int wcwidth(wint_t wc) { - int a, n; - uint32_t c = wc; - - if (c-0x20 < 0x5f) return 1; - if (!iswprint(c)) return wc ? -1 : 0; - if (c-0x20000 < 0x20000) return 2; - - /* The following code is a branchless binary search. */ - a = 0; - n = sizeof ranges / sizeof ranges[0]; - do { - n >>= 1; - a += n+1 & (signed)(ranges[a+n].base-c)>>31; - } while (n); - if (ranges[a].base-c <= ranges[a].len) - return 2*ranges[a].width; - return 1 + (c-0x3040 < 0xd800-0x3040); + if (wc < 0xffU) + return (wc+1 & 0x7f) >= 0x21 ? 1 : wc ? -1 : 0; + if ((wc & 0xfffeffffU) < 0xfffe) { + if ((table[table[wc>>8]*32+((wc&255)>>3)]>>(wc&7))&1) + return 0; + if ((wtable[wtable[wc>>8]*32+((wc&255)>>3)]>>(wc&7))&1) + return 2; + return 1; + } + if ((wc & 0xfffe) == 0xfffe) + return -1; + if (wc-0x20000U < 0x20000) + return 2; + if (wc == 0xe0001 || wc-0xe0020U < 0x5f || wc-0xe0100 < 0xef) + return 0; + return 1; } diff --git a/src/ctype/wide.h b/src/ctype/wide.h new file mode 100644 index 00000000..d90e3fff --- /dev/null +++ b/src/ctype/wide.h @@ -0,0 +1,41 @@ +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,19,16,16,16,16,16,16,16,16,16,16,20,21,22,23,24,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,25, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,26,16,16,16,16,27,16,16,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, +17,17,17,17,17,17,17,28,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,16,16,16,29,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,30,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +16,16,16,16,31,16,16,16,16,16,16,16,16,16,16,16,16,16,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0, +0,248,0,0,0,0,0,0,0,0,0,0,252,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,251,255,255,255, +255,255,255,255,255,255,255,15,0,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,0,0,0,255,15, +254,255,255,255,255,255,255,127,254,255,255,255,255,255,255,255,255,255,127, +254,255,255,255,255,255,255,255,255,255,255,255,255,224,255,255,255,255,63, +254,255,255,255,255,255,255,255,255,255,255,127,255,255,255,255,255,7,255,255, +255,255,15,0,255,255,255,255,255,127,255,255,255,255,255,0,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,127,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,31,255,255,255,255,255,255,127,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,255,255,255,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,15,0,255, +255,127,248,255,255,255,255,255,15,0,0,255,3,0,0,255,255,255,255,247,255,127, +15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,255,255,255,255,255,7,255,1,3,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,