void FAST_FUNC init_unicode(void)
{
- /* In unicode, this is a one character string */
static const char unicode_0x394[] = { 0xce, 0x94, 0 };
+ size_t width;
if (unicode_status != UNICODE_UNKNOWN)
return;
-
- unicode_status = unicode_strlen(unicode_0x394) == 1 ? UNICODE_ON : UNICODE_OFF;
+ /* In unicode, this is a one character string */
+// can use unicode_strlen(string) too, but otherwise unicode_strlen() is unused
+ width = mbstowcs(NULL, unicode_0x394, INT_MAX);
+ unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF);
}
#else
}
-# if LAST_SUPPORTED_WCHAR >= 0x300
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
struct interval {
uint16_t first;
uint16_t last;
* This implementation assumes that wchar_t characters are encoded
* in ISO 10646.
*/
-static int wcwidth(unsigned ucs)
+int FAST_FUNC wcwidth(unsigned ucs)
{
-# if LAST_SUPPORTED_WCHAR >= 0x300
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
/* sorted list of non-overlapping intervals of non-spacing characters */
/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
- static const struct interval combining[] = {
# define BIG_(a,b) { a, b },
# define PAIR(a,b)
# define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \
BIG_(0xFE20, 0xFE23) \
BIG_(0xFEFF, 0xFEFF) \
BIG_(0xFFF9, 0xFFFB)
- ARRAY
+ static const struct interval combining[] = { ARRAY };
# undef BIG_
# undef PAIR
- };
# define BIG_(a,b)
# define PAIR(a,b) (a << 2) | (b-a),
static const uint16_t combining1[] = { ARRAY };
if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
return -1;
/* Quick abort if it is an obviously invalid char */
- if (ucs > LAST_SUPPORTED_WCHAR)
+ if (ucs > CONFIG_LAST_SUPPORTED_WCHAR)
return -1;
/* Optimization: no combining chars below 0x300 */
- if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
+ if (CONFIG_LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
return 1;
-# if LAST_SUPPORTED_WCHAR >= 0x300
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
/* Binary search in table of non-spacing characters */
if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
return 0;
return 0;
/* Optimization: all chars below 0x1100 are not double-width */
- if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
+ if (CONFIG_LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
return 1;
-# if LAST_SUPPORTED_WCHAR >= 0x1100
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x1100
/* Invalid code points: */
/* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
/* Private Use Area (e000..f8ff) */
/* Noncharacters fdd0..fdef */
- if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
- || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
+ if ((CONFIG_LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
+ || (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
) {
return -1;
}
/* 0xfffe and 0xffff in every plane are invalid */
- if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
+ if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
return -1;
}
-# if LAST_SUPPORTED_WCHAR >= 0x10000
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000
if (ucs >= 0x10000) {
/* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
static const struct interval combining0x10000[] = {
if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
return 0;
/* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
- if (LAST_SUPPORTED_WCHAR >= 0xE0001
+ if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xE0001
&& ( ucs == 0xE0001
|| (ucs >= 0xE0020 && ucs <= 0xE007F)
|| (ucs >= 0xE0100 && ucs <= 0xE01EF)
|| ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
|| ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
|| (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
-# if LAST_SUPPORTED_WCHAR >= 0xac00
+# if CONFIG_LAST_SUPPORTED_WCHAR >= 0xac00
|| (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
|| (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
|| (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
* http://www.unicode.org/Public/5.2.0/ucd/extracted/DerivedBidiClass.txt
* Bidi_Class=Left_To_Right | Bidi_Class=Arabic_Letter
*/
- static const struct interval rtl_b[] = {
# define BIG_(a,b) { a, b },
# define PAIR(a,b)
# define ARRAY \
{0x10E7F, 0x10FFF},
{0x1E800, 0x1EFFF}
*/
- ARRAY
+ static const struct interval rtl_b[] = { ARRAY };
# undef BIG_
# undef PAIR
- };
# define BIG_(a,b)
# define PAIR(a,b) (a << 2) | (b-a),
static const uint16_t rtl_p[] = { ARRAY };
* White_Space, Other_Neutral, European_Number, European_Separator,
* European_Terminator, Arabic_Number, Common_Separator
*/
- static const struct interval neutral_b[] = {
# define BIG_(a,b) { a, b },
# define PAIR(a,b)
# define ARRAY \
{0x1F030, 0x1F093},
{0x1F100, 0x1F10A}
*/
- ARRAY
+ static const struct interval neutral_b[] = { ARRAY };
# undef BIG_
# undef PAIR
- };
# define BIG_(a,b)
# define PAIR(a,b) (a << 2) | (b-a),
static const uint16_t neutral_p[] = { ARRAY };
/* The rest is mostly same for libc and for "homegrown" support */
+#if 0 // UNUSED
size_t FAST_FUNC unicode_strlen(const char *string)
{
size_t width = mbstowcs(NULL, string, INT_MAX);
return strlen(string);
return width;
}
+#endif
+
+size_t FAST_FUNC unicode_strwidth(const char *string)
+{
+ uni_stat_t uni_stat;
+ printable_string(&uni_stat, string);
+ return uni_stat.unicode_width;
+}
static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
{