From e7227322b3106e9fa8447410eb09d4a15b4d4b8b Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Wed, 2 Jan 2002 11:06:02 +0000 Subject: [PATCH] Allow 8-bit characters. This is not really complete, it only marks characters with the highest bit set as HIGHBIT. We need to expand this to support the UTF-8 character set properly. However, this solves the problem that the character 0x80 (which is common in UTF-8) gets masked to 0x00. Patch submitted by "Huang Yuzhen" --- crypto/conf/conf_def.h | 143 +++++++++++++++++++++++++---------------- crypto/conf/keysets.pl | 62 ++++++++++-------- 2 files changed, 123 insertions(+), 82 deletions(-) diff --git a/crypto/conf/conf_def.h b/crypto/conf/conf_def.h index 3244d9a331..92a7d8ad77 100644 --- a/crypto/conf/conf_def.h +++ b/crypto/conf/conf_def.h @@ -71,6 +71,7 @@ #define CONF_COMMENT 128 #define CONF_FCOMMENT 2048 #define CONF_EOF 8 +#define CONF_HIGHBIT 4096 #define CONF_ALPHA (CONF_UPPER|CONF_LOWER) #define CONF_ALPHA_NUMERIC (CONF_ALPHA|CONF_NUMBER|CONF_UNDER) #define CONF_ALPHA_NUMERIC_PUNCT (CONF_ALPHA|CONF_NUMBER|CONF_UNDER| \ @@ -78,68 +79,102 @@ #define KEYTYPES(c) ((unsigned short *)((c)->meth_data)) #ifndef CHARSET_EBCDIC -#define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_COMMENT) -#define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_FCOMMENT) -#define IS_EOF(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_EOF) -#define IS_ESC(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_ESC) -#define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_NUMBER) -#define IS_WS(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_WS) -#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC) +#define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_COMMENT) +#define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_FCOMMENT) +#define IS_EOF(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_EOF) +#define IS_ESC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ESC) +#define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_NUMBER) +#define IS_WS(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_WS) +#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC) #define IS_ALPHA_NUMERIC_PUNCT(c,a) \ - (KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC_PUNCT) -#define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_QUOTE) -#define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_DQUOTE) + (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC_PUNCT) +#define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_QUOTE) +#define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_DQUOTE) +#define IS_HIGHBIT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_HIGHBIT) #else /*CHARSET_EBCDIC*/ -#define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_COMMENT) -#define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_FCOMMENT) -#define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_EOF) -#define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ESC) -#define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_NUMBER) -#define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_WS) -#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC) +#define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_COMMENT) +#define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_FCOMMENT) +#define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_EOF) +#define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ESC) +#define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_NUMBER) +#define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_WS) +#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC) #define IS_ALPHA_NUMERIC_PUNCT(c,a) \ - (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC_PUNCT) -#define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_QUOTE) -#define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_DQUOTE) + (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC_PUNCT) +#define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_QUOTE) +#define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_DQUOTE) +#define IS_HIGHBIT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_HIGHBIT) #endif /*CHARSET_EBCDIC*/ -static unsigned short CONF_type_default[128]={ - 0x008,0x000,0x000,0x000,0x000,0x000,0x000,0x000, - 0x000,0x010,0x010,0x000,0x000,0x010,0x000,0x000, - 0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000, - 0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000, - 0x010,0x200,0x040,0x080,0x000,0x200,0x200,0x040, - 0x000,0x000,0x200,0x200,0x200,0x200,0x200,0x200, - 0x001,0x001,0x001,0x001,0x001,0x001,0x001,0x001, - 0x001,0x001,0x000,0x200,0x000,0x000,0x000,0x200, - 0x200,0x002,0x002,0x002,0x002,0x002,0x002,0x002, - 0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002, - 0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002, - 0x002,0x002,0x002,0x000,0x020,0x000,0x200,0x100, - 0x040,0x004,0x004,0x004,0x004,0x004,0x004,0x004, - 0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004, - 0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004, - 0x004,0x004,0x004,0x000,0x200,0x000,0x200,0x000, +static unsigned short CONF_type_default[256]={ + 0x0008,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0010,0x0010,0x0000,0x0000,0x0010,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0010,0x0200,0x0040,0x0080,0x0000,0x0200,0x0200,0x0040, + 0x0000,0x0000,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200, + 0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001, + 0x0001,0x0001,0x0000,0x0200,0x0000,0x0000,0x0000,0x0200, + 0x0200,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002, + 0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002, + 0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002, + 0x0002,0x0002,0x0002,0x0000,0x0020,0x0000,0x0200,0x0100, + 0x0040,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004, + 0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004, + 0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004, + 0x0004,0x0004,0x0004,0x0000,0x0200,0x0000,0x0200,0x0000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, }; -static unsigned short CONF_type_win32[128]={ - 0x008,0x000,0x000,0x000,0x000,0x000,0x000,0x000, - 0x000,0x010,0x010,0x000,0x000,0x010,0x000,0x000, - 0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000, - 0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000, - 0x010,0x200,0x400,0x000,0x000,0x200,0x200,0x000, - 0x000,0x000,0x200,0x200,0x200,0x200,0x200,0x200, - 0x001,0x001,0x001,0x001,0x001,0x001,0x001,0x001, - 0x001,0x001,0x000,0xA00,0x000,0x000,0x000,0x200, - 0x200,0x002,0x002,0x002,0x002,0x002,0x002,0x002, - 0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002, - 0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002, - 0x002,0x002,0x002,0x000,0x000,0x000,0x200,0x100, - 0x000,0x004,0x004,0x004,0x004,0x004,0x004,0x004, - 0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004, - 0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004, - 0x004,0x004,0x004,0x000,0x200,0x000,0x200,0x000, +static unsigned short CONF_type_win32[256]={ + 0x0008,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0010,0x0010,0x0000,0x0000,0x0010,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0010,0x0200,0x0400,0x0000,0x0000,0x0200,0x0200,0x0000, + 0x0000,0x0000,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200, + 0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001, + 0x0001,0x0001,0x0000,0x0A00,0x0000,0x0000,0x0000,0x0200, + 0x0200,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002, + 0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002, + 0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002, + 0x0002,0x0002,0x0002,0x0000,0x0000,0x0000,0x0200,0x0100, + 0x0000,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004, + 0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004, + 0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004, + 0x0004,0x0004,0x0004,0x0000,0x0200,0x0000,0x0200,0x0000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, + 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000, }; diff --git a/crypto/conf/keysets.pl b/crypto/conf/keysets.pl index 56669e76ac..50ed67fa52 100644 --- a/crypto/conf/keysets.pl +++ b/crypto/conf/keysets.pl @@ -12,8 +12,9 @@ $DQUOTE=0x400; $COMMENT=0x80; $FCOMMENT=0x800; $EOF=0x08; +$HIGHBIT=0x1000; -foreach (0 .. 127) +foreach (0 .. 255) { $v=0; $c=sprintf("%c",$_); @@ -27,11 +28,12 @@ foreach (0 .. 127) $v|=$QUOTE if ($c =~ /['`"]/); # for emacs: "`'}/) $v|=$COMMENT if ($c =~ /\#/); $v|=$EOF if ($c =~ /\0/); + $v|=$HIGHBIT if ($c =~/[\x80-\xff]/); push(@V_def,$v); } -foreach (0 .. 127) +foreach (0 .. 255) { $v=0; $c=sprintf("%c",$_); @@ -44,6 +46,7 @@ foreach (0 .. 127) $v|=$DQUOTE if ($c =~ /["]/); # for emacs: "}/) $v|=$FCOMMENT if ($c =~ /;/); $v|=$EOF if ($c =~ /\0/); + $v|=$HIGHBIT if ($c =~/[\x80-\xff]/); push(@V_w32,$v); } @@ -122,6 +125,7 @@ print <<"EOF"; #define CONF_COMMENT $COMMENT #define CONF_FCOMMENT $FCOMMENT #define CONF_EOF $EOF +#define CONF_HIGHBIT $HIGHBIT #define CONF_ALPHA (CONF_UPPER|CONF_LOWER) #define CONF_ALPHA_NUMERIC (CONF_ALPHA|CONF_NUMBER|CONF_UNDER) #define CONF_ALPHA_NUMERIC_PUNCT (CONF_ALPHA|CONF_NUMBER|CONF_UNDER| \\ @@ -129,51 +133,53 @@ print <<"EOF"; #define KEYTYPES(c) ((unsigned short *)((c)->meth_data)) #ifndef CHARSET_EBCDIC -#define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_COMMENT) -#define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_FCOMMENT) -#define IS_EOF(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_EOF) -#define IS_ESC(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_ESC) -#define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_NUMBER) -#define IS_WS(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_WS) -#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC) +#define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_COMMENT) +#define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_FCOMMENT) +#define IS_EOF(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_EOF) +#define IS_ESC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ESC) +#define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_NUMBER) +#define IS_WS(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_WS) +#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC) #define IS_ALPHA_NUMERIC_PUNCT(c,a) \\ - (KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC_PUNCT) -#define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_QUOTE) -#define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_DQUOTE) + (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC_PUNCT) +#define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_QUOTE) +#define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_DQUOTE) +#define IS_HIGHBIT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_HIGHBIT) #else /*CHARSET_EBCDIC*/ -#define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_COMMENT) -#define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_FCOMMENT) -#define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_EOF) -#define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ESC) -#define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_NUMBER) -#define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_WS) -#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC) +#define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_COMMENT) +#define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_FCOMMENT) +#define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_EOF) +#define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ESC) +#define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_NUMBER) +#define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_WS) +#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC) #define IS_ALPHA_NUMERIC_PUNCT(c,a) \\ - (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC_PUNCT) -#define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_QUOTE) -#define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_DQUOTE) + (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC_PUNCT) +#define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_QUOTE) +#define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_DQUOTE) +#define IS_HIGHBIT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_HIGHBIT) #endif /*CHARSET_EBCDIC*/ EOF -print "static unsigned short CONF_type_default[128]={"; +print "static unsigned short CONF_type_default[256]={"; -for ($i=0; $i<128; $i++) +for ($i=0; $i<256; $i++) { print "\n\t" if ($i % 8) == 0; - printf "0x%03X,",$V_def[$i]; + printf "0x%04X,",$V_def[$i]; } print "\n\t};\n\n"; -print "static unsigned short CONF_type_win32[128]={"; +print "static unsigned short CONF_type_win32[256]={"; -for ($i=0; $i<128; $i++) +for ($i=0; $i<256; $i++) { print "\n\t" if ($i % 8) == 0; - printf "0x%03X,",$V_w32[$i]; + printf "0x%04X,",$V_w32[$i]; } print "\n\t};\n\n"; -- 2.25.1