2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 static char sccsid[] = "$TOG: RFCMIME.c /main/11 1999/06/30 12:08:55 mgreess $";
25 * COMPONENT_NAME: desktop
28 * DtXlateOpToStdLocale
29 * DtXlateStdToOpLocale
45 * OBJECT CODE ONLY SOURCE MATERIALS
59 /* Iconv not defined for linux. Use the EUSCompat stubs instead. */
63 #include <EUSCompat.h>
65 #include <LocaleXlate.h>
67 #ifdef ICONV_INBUF_CONST
68 # define ICONV_INBUF_TYPE const char **
70 # define ICONV_INBUF_TYPE char **
73 #define WORKSIZE 1024*10
75 * The following escape sequence is defined as "To ASCII".
76 * But is it correct regardless of ISO-2022-XX ???
79 static char ToASCII[ToASCII_NUM] = { 0x1b, 0x28, 0x42 };
82 * _i18nwork1[] is used to convert the passed string with CD iconv.
86 static void *_i18nwork1 = NULL;
87 static unsigned long _i18nsize1 = 0;
88 static int shouldAlloc1 = ~0;
91 * _i18nwork2[] is used to convert the passed string with CD iconv.
95 static void *_i18nwork2 = NULL;
96 static unsigned long _i18nsize2 = 0;
97 static int shouldAlloc2 = ~0;
100 * Forward declarations
102 extern void writeBase64(char * buf, const char * bp, const unsigned long len);
104 static const char *DfltStdCharset = "us-ascii";
105 static const char *DfltStdLang = "C";
107 static void crlf(char *buf)
112 /******************************************************************************
113 * Function: int DtXlateOpToStdLocale (char *operation, char *opLocale,
114 * char **ret_stdLocale, char **ret_stdLang, char **ret_stdSet)
117 * operation Operation associated with the locale value
118 * opLocale An operation-specific locale string
119 * ret_locale Returns the std locale
120 * Caller must free this string.
121 * ret_stdLang Returns the std language & territory string.
122 * Caller must free this string.
123 * ret_stdSet Returns the std code set string.
124 * Caller must free this string.
128 * Purpose: Gets the standard locale given an operation and its locale
130 *****************************************************************************/
132 DtXlateOpToStdLocale (
135 char **ret_stdLocale,
139 _DtXlateDb MyDb = NULL;
140 char MyPlatform[_DtPLATFORM_MAX_LEN + 1];
144 if (_DtLcxOpenAllDbs(&MyDb) == 0 &&
145 _DtXlateGetXlateEnv(MyDb,MyPlatform,&ExecVer,&CompVer) != 0)
147 _DtLcxCloseDb(&MyDb);
154 (void) _DtLcxXlateOpToStd(MyDb, MyPlatform, ExecVer,
156 ret_stdLocale, ret_stdLang, ret_stdSet, NULL);
159 /* if failed, give default values */
160 if (ret_stdLocale != NULL && *ret_stdLocale == NULL)
162 *ret_stdLocale = (char *)malloc(strlen(DfltStdLang)+strlen(DfltStdCharset)+3);
163 sprintf(*ret_stdLocale,"%s.%s",DfltStdLang,DfltStdCharset);
165 if (ret_stdLang != NULL && *ret_stdLang == NULL)
166 *ret_stdLang = (char *)strdup(DfltStdLang);
167 if (ret_stdSet != NULL && *ret_stdSet == NULL)
168 *ret_stdSet = (char *)strdup(DfltStdCharset);
171 /******************************************************************************
172 * Function: int DtXlateStdToOpLocale ( char *operation, char *stdLocale,
173 * char *stdLang, char *stdCodeSet,
174 * char *dflt_opLocale, char **ret_opLocale)
177 * operation operation whose locale value will be retrieved
178 * stdLocale standard locale value
179 * stdLang standard Lang/Territory Value
180 * stdCodeSet standard CodeSet Value
181 * dflt_opLocale operation-specific locale-value
182 * This is the default value used in error case
183 * ret_opLocale operation-specific locale-value placed here
184 * Caller must free this string.
188 * Purpose: Gets an operation-specific locale string given the standard string
190 *****************************************************************************/
192 DtXlateStdToOpLocale (
200 _DtXlateDb MyDb = NULL;
201 char MyPlatform[_DtPLATFORM_MAX_LEN + 1];
205 if (_DtLcxOpenAllDbs(&MyDb) == 0 &&
206 _DtXlateGetXlateEnv(MyDb,MyPlatform,&ExecVer,&CompVer) != 0)
208 _DtLcxCloseDb(&MyDb);
215 (void) _DtLcxXlateStdToOp(MyDb, MyPlatform, ExecVer,
216 operation, stdLocale, stdLang, stdCodeSet, NULL,
220 /* if translation fails, use a default value */
221 if (ret_opLocale && *ret_opLocale == NULL)
223 if (dflt_opLocale) *ret_opLocale = (char *)strdup(dflt_opLocale);
224 else if (stdLocale) *ret_opLocale = (char *)strdup(stdLocale);
230 char *ret_locale = NULL;
231 char *ret_lang = NULL;
232 char *ret_codeset = NULL;
233 char *ret_target = NULL;
235 DtXlateOpToStdLocale(DtLCX_OPER_SETLOCALE,
236 setlocale(LC_CTYPE, NULL),
240 DtXlateStdToOpLocale(DtLCX_OPER_INTERCHANGE_CODESET,
246 DtXlateStdToOpLocale(DtLCX_OPER_MIME,
257 getCharSet(char * charset)
261 mimeCS = targetTagName();
264 strcpy(charset, mimeCS);
266 strcpy(charset, "us-ascii"); /* default MIME codeset */
272 md5PlainText(const char * bp, const unsigned long len, unsigned char * digest)
274 /* We need to compute the md5 signature based on a message that has
275 // the CRLF line terminator. Most of our buffers don't so we will need
276 // to scan the body and do some magic. The approach will be to sum
277 // one line at a time. If the buffer doesn't have CRLF we will do that
282 unsigned char * local_crlf = (unsigned char *)"\r\n";
283 const char * last = bp;
287 for (cur = bp; cur < (bp + len); cur++) {
289 if (cur == bp || *(cur - 1) == '\r') {
290 MD5Update(&context, (unsigned char *)last,
294 MD5Update(&context, (unsigned char *)last,
296 MD5Update(&context, local_crlf, 2);
302 if (bp[len - 1] != '\n') {
303 /* Need to sum the trailing fraction with a CRLF. */
304 MD5Update(&context, (unsigned char *)last,
306 MD5Update(&context, local_crlf, 2);
309 MD5Final(digest, &context);
313 static void _converter_( iconv_t CD,
314 void *from, unsigned long from_len,
315 void **to, unsigned long *to_len )
320 size_t OutBytesLeft = 0;
321 size_t _OutBytesLeft = 0;
323 size_t converted_num = 0;
329 if ( shouldAlloc1 ) {
330 /* Obtain work area */
331 _i18nwork1 = (size_t *)malloc( WORKSIZE );
336 _i18nsize1 = WORKSIZE;
340 InBuf = (char *)from;
341 InBytesLeft = from_len;
342 OutBytesLeft = _i18nsize1;
343 OutBuf = (char *)_i18nwork1;
346 * Need to place iconv state to the initial one by
347 * setting inbuf to NULL of iconv().
349 iconv( CD, (ICONV_INBUF_TYPE)NULL, 0, NULL, 0 );
354 * +----------------------------+
356 * +----------------------------+
357 * <-------------------------->
366 * +----------------------------+
368 * +----------------------------+
369 * <-------------------------->
373 iconv_ret = iconv( CD, (ICONV_INBUF_TYPE)&InBuf, &InBytesLeft,
374 &OutBuf, &OutBytesLeft );
375 if ( iconv_ret == 0 ) {
379 * +----------------------------+
380 * |XXXXXXXXXXXXXXXXXXXXXXXXXXXX|
381 * +----------------------------+
388 * +----------------------------+
389 * |XXXXXXXXXXXXXXXXX| | | |
390 * +----------------------------+
391 * <---------------> <-------->
392 * converted_num OutBytesLeft
394 converted_num = (unsigned long)((char *)OutBuf-(char *)_i18nwork1);
395 *to = (void *)_i18nwork1;
396 *to_len = (unsigned long)converted_num;
399 if ( errno == E2BIG ) {
400 /* Overflow. still data is left.
403 * +----------------------------+
404 * |XXXXXXXXXXXXXX| | | |
405 * +----------------------------+
412 * +----------------------------+
413 * |XXXXXXXXXXXXXXXXXXXXXXXXXXX |
414 * +----------------------------+
415 * <------------------------->
416 * converted_num OutBytesLeft=?
420 /* Check how many converted already.
423 (unsigned long)((char *)OutBuf - (char *)_i18nwork1);
424 _i18nsize1 += WORKSIZE;
425 _p = realloc( _i18nwork1, _i18nsize1 );
436 OutBuf = (char *)((char*)_i18nwork1 + converted_num);
437 OutBytesLeft += WORKSIZE;
450 CvtStr( char *charSet, void *from, unsigned long from_len,
451 void **to, unsigned long *to_len, Direction dir )
453 char *ret_locale = NULL;
454 char *ret_lang = NULL;
455 char *ret_codeset = NULL;
456 char *from_codeset = NULL;
457 char *to_codeset = NULL;
458 char *CuStdCodeSet = NULL;
459 char *InterChCodeSet = NULL;
460 char *StdCodeSet = NULL;
463 int isStopASCII = ~0;
464 unsigned long converted_num = 0;
466 /* Get CuStdCodeSet */
467 DtXlateOpToStdLocale( DtLCX_OPER_SETLOCALE,
468 setlocale( LC_CTYPE, NULL ),
474 * If charSet is NULL, it means the passed string's charset in *from is
475 * unknown by dtmail. In this case, this converter assumes that
476 * when dir = CURRENT_TO_INTERNET,
477 * *from's encoding is the current locale's one.
478 * when dir = INTERNET_TO_CURRENT,
479 * *from's encoding is the current locale's Internet Message's one.
482 * dtmail is running under ja_JP locale.
483 * dir : CURRENT_TO_INTERNET
486 * dir : INTERNET_TO_CURRENT
487 * *from = ISO-2022-JP
491 * ISO-2022-JP can be converted to either EUC-JP or IBM-932 practically.
492 * But the current AIX.lcx says
493 * StdCodeSet InterchangeCodeset
494 * EUC-JP <--> ISO-2022-JP
495 * IBM-932 ---> ISO-2022-JP
496 * HP-SJIS ---> ISO-2022-JP
497 * HP-KANA8 ---> ISO-2022-JP
498 * therefore DtXlateOpToStdLocale() can convert ISO-2022-JP to EUC-JP only.
499 * To fix this, we hard-code'ed this situation with the CDE Standard Name
501 * ???? Is it correct ???
503 if ( dir == INTERNET_TO_CURRENT ) {
505 * As for from_codeset
507 if ( ( charSet == NULL ) || ( *charSet == '\0' ) ) {
508 /* Convert CuStdCodeSet to StdInterChangeCodeSet */
509 DtXlateStdToOpLocale( DtLCX_OPER_INTERCHANGE_CODESET,
516 /* Convert charSet to StdInterChangeCodeSet */
517 ret_locale = ret_lang = ret_codeset = NULL;
518 DtXlateOpToStdLocale( DtLCX_OPER_MIME,
525 /* Convert StdInterChangeCodeSet to OpIVONC3 codeset */
526 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
535 if ( ( charSet == NULL ) || ( *charSet == '\0' ) ) {
536 /* Convert CuStdCodeSet to OpIVONC3 codeset */
537 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
545 if ( (!strncasecmp(InterChCodeSet,"ISO-2022-JP",11) &&
546 !strncasecmp(CuStdCodeSet,"IBM-932",7) ) ||
547 (!strncasecmp(InterChCodeSet,"ISO-2022-JP",11) &&
548 !strncasecmp(CuStdCodeSet,"EUC-JP",6) ) ) {
549 ret_codeset = CuStdCodeSet;
553 /* Convert InterChCodeSet to StdCodeSet */
554 ret_locale = ret_lang = ret_codeset = NULL;
555 DtXlateOpToStdLocale( DtLCX_OPER_INTERCHANGE_CODESET,
561 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
568 } else { /* dir == CURRENT_TO_INTERNET */
570 * As for from_codeset
572 if ( ( charSet == NULL ) || ( *charSet == '\0' ) ) {
573 /* Convert CuStdCodeSet to OpICONV3 codeset */
574 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
581 /* Convert charSet to StdInterChangeCodeSet */
582 ret_locale = ret_lang = ret_codeset = NULL;
583 DtXlateOpToStdLocale( DtLCX_OPER_MIME,
588 /* Convert StdInterChangeCodeSet to OpIVONC3 codeset */
589 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
599 if ( ( charSet == NULL ) || ( *charSet == '\0' ) ) {
600 /* Convert CuStdCodeSet to StdInterChangeCodeSet */
601 DtXlateStdToOpLocale( DtLCX_OPER_INTERCHANGE_CODESET,
608 /* Convert charSet to StdInterChangeCodeSet */
609 ret_locale = ret_lang = ret_codeset = NULL;
610 DtXlateOpToStdLocale( DtLCX_OPER_MIME,
617 /* Convert StdInterChangeCodeSet to OpIVONC3 codeset */
618 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
629 if ( shouldAlloc2 ) {
630 /* Obtain work area */
631 _i18nwork2 = (size_t *)malloc( WORKSIZE );
636 _i18nsize2 = WORKSIZE;
640 if (NULL == to_codeset || NULL == from_codeset)
643 if ( ( CD = iconv_open( to_codeset, from_codeset ) ) != (iconv_t)-1 ) {
645 * According to several RFCs( 822, 1468, 1557, ..... )
646 * the escape sequence to switch to ASCII is needed just before
647 * '\n'. IBM-eucJP/IBM-932 <--> fold 7 does while the other doesn't.
648 * Therefore CvtStr() does take care of this here.
650 if ( dir == INTERNET_TO_CURRENT ) {
651 _converter_( CD, from, from_len, to, to_len );
653 void *new_from = from;
654 unsigned long new_from_len = from_len;
655 unsigned long _passed = 0;
658 unsigned long _tmp_len = 0;
660 while ( _passed < from_len ) {
664 for ( ; _passed < from_len; _passed += clen ) {
665 clen = mblen(&(((char *)from)[_passed]), MB_CUR_MAX);
669 if ( ( clen > 1 ) || !isascii( ((char*)from)[_passed] ) ){
670 /* Here, maybe MB or non-ASCII */
674 if ( ( ((char*)from)[_passed] != '\n' ) &&
675 ( ((char*)from)[_passed] != '\0' ) ) {
680 if ( ((char*)from)[_passed] == '\n' ||
681 ((char*)from)[_passed] == '\0' )
684 new_from_len = &(((char *)from)[_passed])-(char *)new_from;
685 if ( ( _passed < from_len ) && ( clen == 1 ) &&
686 ( ((char*)from)[_passed] == '\n' ) ) {
691 * new_from from[_passed]
693 * +------------------------+--+------.................+
695 * +------------------------+--+------.................+
696 * <-------------------------> $
697 * new_from_len next new_from
698 * <------------------------------------------------->
704 * ********** DO 1 LINE CONVERSION **********
706 _tmp = NULL; _tmp_len = 0;
707 _converter_( CD, new_from, new_from_len, &_tmp, &_tmp_len );
709 if ( ( _tmp == NULL ) && ( _tmp_len == 0 ) ) {
710 /* Conversion fail */
719 * +-----------------------+ +-------------+
721 * +-----------------------+ +-------------+
724 * <---------------------> <----------->
727 /* Append _tmp to target */
728 if ( converted_num + _tmp_len > _i18nsize2 ) {
729 /* Need much memory..... */
732 _i18nsize2 += WORKSIZE;
733 _i18n = realloc( _i18nwork2, _i18nsize2 );
749 * +---------------------------+
750 * |XXXXXXXXXXX(COPIED)XX |
751 * +---------------------------+
752 * <---------><-------->
753 * (old)conv. _tmp_len
754 * <------------------->
756 * <-------------------------->
759 strncpy( (char *)_i18nwork2 + converted_num,
760 (char *)_tmp, _tmp_len );
761 converted_num += _tmp_len;
763 *to = (void *)_i18nwork2;
764 *to_len = converted_num;
765 new_from = &(((char *)from)[_passed]) + 1;
769 * According to RFC1468, if the line is ended with non-ASCII
770 * char, but not not switch to ASCII before the end of line,
771 * we must switch to ASCII just before the end of the line.
775 * +---------------------------+ +------+
776 * |XXXXXXXXXXXXXXXXXXXXX | + |1b2842|
777 * +---------------------------+ +------+
778 * <-------------------> <---->
779 * converted_num ToASCII_NUM
780 * <------------------------->
786 if ( !isStopASCII ) {
787 if ( (((char *)_i18nwork2)[converted_num-1] == '\n') ||
788 (((char *)_i18nwork2)[converted_num-1] == '\0') ) {
790 char _p = ((char *)_i18nwork2)[converted_num-1];
792 if (!((converted_num >=3+1 ) &&
793 !memcmp((void *)((char *)_i18nwork2+converted_num-3-1),
794 ToASCII, ToASCII_NUM )) ){
795 /* if not ToASCII there, then */
798 if ( converted_num + ToASCII_NUM > _i18nsize2 ) {
799 /* Need much memory..... */
802 _i18nsize2 += WORKSIZE;
803 _i18n=realloc(_i18nwork2,_i18nsize2);
813 strncpy( (char *)_i18nwork2+converted_num-1,
814 ToASCII, ToASCII_NUM );
815 converted_num += ToASCII_NUM;
816 ((char *)_i18nwork2)[converted_num-1] = _p;
818 *to_len = converted_num;
822 if (!((converted_num >=3 ) &&
823 !memcmp((void *)((char *)_i18nwork2+converted_num-3),
824 ToASCII, ToASCII_NUM )) ){
826 // if not ToASCII there, then
829 if ( converted_num + ToASCII_NUM > _i18nsize2 ) {
832 _i18nsize2 += WORKSIZE;
833 _i18n=realloc(_i18nwork2,_i18nsize2);
843 strncpy( (char *)_i18nwork2+converted_num,
844 ToASCII, ToASCII_NUM );
845 converted_num += ToASCII_NUM;
847 *to_len = converted_num;
856 if( ( *to != NULL ) && ( *to_len != 0 ) ) {
857 if ( !isStopASCII ) {
858 if ( (((char *)_i18nwork2)[converted_num-1] == '\n') ||
859 (((char *)_i18nwork2)[converted_num-1] == '\0') ) {
861 char _p = ((char *)_i18nwork2)[converted_num-1];
863 if (!((converted_num >=3+1 ) &&
864 !memcmp((void *)((char *)_i18nwork2+converted_num-3-1),
865 ToASCII, ToASCII_NUM )) ){
866 /* if not ToASCII there, then
870 if ( converted_num + ToASCII_NUM > _i18nsize2 ) {
873 _i18nsize2 += WORKSIZE;
874 _i18n=realloc(_i18nwork2,_i18nsize2);
883 strncpy( (char *)_i18nwork2+converted_num-1,
884 ToASCII, ToASCII_NUM );
885 converted_num += ToASCII_NUM;
886 ((char *)_i18nwork2)[converted_num-1] = _p;
888 *to_len = converted_num;
892 if (!((converted_num >=3 ) &&
893 !memcmp((void *)((char *)_i18nwork2+converted_num-3),
894 ToASCII, ToASCII_NUM )) ){
895 /* if not ToASCII there, then
899 if ( converted_num + ToASCII_NUM > _i18nsize2 ) {
902 _i18nsize2 += WORKSIZE;
903 _i18n=realloc(_i18nwork2,_i18nsize2);
912 strncpy( (char *)_i18nwork2+converted_num,
913 ToASCII, ToASCII_NUM );
914 converted_num += ToASCII_NUM;
916 *to_len = converted_num;
923 * If InterChCodeSet is either ISO-2022-JP/ISO-2022-KR/ISO-2022-TW
924 * ISO-2022-CN, strip MSB here since iconv'ed UDC has MSB set to 1
926 if ( !strncasecmp( InterChCodeSet, "ISO-2022-JP", 11 ) ||
927 !strncasecmp( InterChCodeSet, "ISO-2022-KR", 11 ) ||
928 !strncasecmp( InterChCodeSet, "ISO-2022-TW", 11 ) ||
929 !strncasecmp( InterChCodeSet, "ISO-2022-CN", 11 ) ) {
932 for ( _i = 0; _i < *to_len; _i++ ) {
933 ((unsigned char *)(*to))[_i] &= (unsigned char)0x7f;
942 base64size(const unsigned long len)
944 int b_len = len + (len / 3);
945 b_len += (b_len / 72 * 2) + 4;
950 getEncodingType(const char * body,
951 const unsigned int len,
952 boolean_t strict_mime)
955 // Our goal here is to produce the most readable, safe encoding.
956 // We have a couple of parameters that will guide our
959 // 1) RFC822 allows lines to be a minimum of 1000 characters,
960 // but MIME encourages mailers to keep lines to <76 characters
961 // and use quoted-printable if necessary to achieve this.
963 // 2) The base64 encoding will grow the body size by 33%, and
964 // also render it unreadable by humans. We don't want to use
965 // it unless really necessary.
967 // Given the above 2 rules, we want to scan the body part and
968 // select an encoding. The 3 choices will be decided by:
970 // 1) If the text is 7 bit clean, and all lines are <76 chars,
971 // then no encoding will be applied.
974 // 2) If the text is not 7 bit clean, or there are lines >76 chars,
975 // and the quoted-printable size is less than the base64 size,
976 // then quoted-printable will be done.
978 // 3) If 1 & 2 are not true, then base64 will be applied.
980 // If "strict_mime" is false we will only encode if the message
981 // is not 7 bit clean.
985 int base64_growth = base64size(len) - len;
986 int qprint_growth = 0;
987 boolean_t eight_bit = B_FALSE;
988 boolean_t base64 = B_FALSE;
989 boolean_t encode = B_FALSE;
992 const char * last_nl = body;
995 if (body == NULL || len == 0) {
998 if (strncmp(body, "From ", 5) == 0) {
1002 for (cur = body; cur < (body + len); cur++) {
1003 if (*cur != (*cur & 0x7f)) {
1007 } else if (*cur == '=' || *cur == 0) {
1009 // These characters don't force encoding, but will be
1010 // encoded if we end up encoding.
1016 if ((cur - last_nl) > 76) {
1022 if ((cur != body && (*(cur - 1) == ' ' || *(cur - 1) == '\t'))) {
1027 if ((cur + 6) < (body + len) &&
1028 strncmp((cur + 1), "From ", 5) == 0) {
1036 if (encode && (qprint_growth > base64_growth)) {
1042 /* Deal with buffers that dont end with a new line. */
1044 if ((cur - last_nl) > 76) {
1051 if (!strict_mime && !eight_bit) {
1052 /* If strict_mime is off we only encode if we have 8 bit data */
1054 } else if (encode) {
1055 /* strict_mime is TRUE and we have reason to encode. */
1067 writeContentHeaders(char * hdr_buf,
1070 const char * digest,
1073 char default_charset[64];
1075 strcat(hdr_buf,"Content-Type: ");
1076 strcat(hdr_buf,type);
1079 strcpy(default_charset,"US-ASCII");
1081 getCharSet(default_charset);
1083 strcat(hdr_buf,"; charset=");
1085 strcat(hdr_buf,default_charset);
1089 strcat(hdr_buf,"Content-Transfer-Encoding: ");
1093 strcat(hdr_buf,"7bit\n");
1097 default: /* Assume the worst. */
1098 strcat(hdr_buf,"8bit\n");
1101 strcat(hdr_buf,"quoted-printable\n");
1105 strcat(hdr_buf,"base64\n");
1109 strcat(hdr_buf,"Content-MD5: ");
1110 writeBase64(hdr_buf, digest, 16);
1114 // Base64 Alphabet (65-character subset of US-ASCII as per RFC1521)
1118 static const char base64_chars[] =
1119 {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
1120 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a',
1121 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
1122 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0',
1123 '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
1127 writeBase64(char * buf, const char * bp, const unsigned long len)
1130 /* The length has to be a multiple of 3. We will need to pad
1131 // any extra. Let's just work on the main body and save the
1132 // fractional stuff for the end.
1134 unsigned long main_len = len - (len % 3);
1135 const unsigned char * ubp = (const unsigned char *)bp;
1139 unsigned int enc_char;
1145 if (bp == NULL || len == 0) {
1150 for (block = 0; block < main_len; block += 3) {
1151 enc_char = (ubp[block] >> 2) & 0x3f;
1152 line[lf++] = base64_chars[enc_char];
1154 enc_char = ((ubp[block] & 0x3) << 4) | ((ubp[block+1] >> 4) & 0xf);
1155 line[lf++] = base64_chars[enc_char];
1157 enc_char = ((ubp[block + 1] & 0xf) << 2) | ((ubp[block + 2] >> 6) & 0x3);
1158 line[lf++] = base64_chars[enc_char];
1160 enc_char = ubp[block + 2] & 0x3f;
1161 line[lf++] = base64_chars[enc_char];
1164 strncat(buf,line,lf);
1171 strncat(buf, line,lf);
1174 if (((lf + 4) % 72) == 0) {
1180 enc_char = (ubp[block] >> 2) & 0x3f ;
1181 strncat(buf, &base64_chars[enc_char], 1);
1183 enc_char = ((ubp[block] & 0x3) << 4);
1184 strncat(buf, &base64_chars[enc_char], 1);
1186 strncat(buf,"==", 2);
1190 enc_char = (ubp[block] >> 2) & 0x3f;
1191 strncat(buf,&base64_chars[enc_char], 1);
1193 enc_char = ((ubp[block] & 0x3) << 4) | ((ubp[block+1] >> 4) & 0xf);
1194 strncat(buf,&base64_chars[enc_char], 1);
1196 enc_char = ((ubp[block + 1] & 0xf) << 2);
1197 strncat(buf,&base64_chars[enc_char], 1);
1199 strncat(buf,"=", 1);
1205 writeQPrint(char *buf, const char * bp, const unsigned long bp_len,
1220 if (bp == NULL || bp_len == 0) {
1227 // A line buffer for improving formatting performance. Note that
1228 // QP requires all lines to be < 72 characters plus CRLF. So, a
1229 // fixed size 80 character buffer is safe.
1232 /* There are probably more elegant ways to deal with a message that
1233 // begins with "From ", but we will simply due it this more simplistic
1236 if (strncmp(bp, "From ", 5) == 0) {
1237 memcpy(&line_buf[off], "=46", 3);
1245 /* This loop will apply the encodings, following the rules identified
1246 // in RFC1521 (though not necessarily in the order presented.
1248 for (cur = start; cur < (bp + bp_len); cur++) {
1250 /* Rule #5: Part 1! We will try to break at white space
1251 // if possible, but it may not be possible. In any case,
1252 // we want to force the lines to be less than 76 characters.
1255 line_buf[off++] = '=';
1256 strncat(buf,line_buf, off);
1262 /* Rule #1: Any octet, except those indicating a line break
1263 // according to the newline convention mabe represented by
1264 // an = followed by a two digit hexadecimal representation
1265 // of the octet's value. We will represent any non-7bit
1266 // data this way, but let the rest slide. We do wrap "="
1269 if (*cur != (*cur & 0x7f) || *cur == '=') {
1271 sprintf(tmp, "=%02X", (int)(unsigned char)*cur);
1272 memcpy(&line_buf[off], tmp, 3);
1278 * Under ISO-2022-XX codeset, several escape sequence may be in
1279 * From, Subject field. To pass them, writeQPrint() also accept
1280 * such kind of character.
1282 if ( *cur == (char)0x1b ) {
1283 /* Only 0x1b ????? */
1285 sprintf(tmp, "=%02X", (int)(unsigned char)*cur);
1286 memcpy(&line_buf[off], tmp, 3);
1292 /* Rule #2: Octets with decimal values of 33 through 60
1293 // inclusive and 62 through 126, inclusive, MAY be represented
1294 // as the ASCII characters which correspond to those octets.
1296 if ((*cur >= 33 && *cur <= 60) ||
1297 (*cur >= 62 && *cur <= 126)) {
1298 line_buf[off++] = *cur;
1302 /* Rule #5: The q-p encoding REQUIRES that encoded lines be
1303 // no more than 76 characters long. If longer, an equal sign
1304 // as the last character n the line indicates a soft line break.
1306 // This is tricky if you want to leave it reasonably readable
1307 // (why else do this?). We only want to break on white space.
1308 // At each white gap, we need to count forward to the next
1309 // white gap and see if we exceed the 76 character limit.
1310 // We will cheat a few characters to allow us some room
1313 if (*cur == ' ' || *cur == '\t') {
1314 /* Find the end of this clump of white space.
1317 nw < (bp + bp_len) && *nw && *nw != '\n'; nw++) {
1318 if (!isspace(*nw)) {
1323 /* Find the end of the next non-white region.
1326 white < (bp + bp_len) && *white && !isspace(*white);
1331 line_len = (off - last_nl) + (white - cur);
1332 if (line_len > 72) {
1333 /* Need a soft line break. Lets put it after the
1334 // current clump of white space. We will break
1335 // at 72 characters, even if we arent at the end
1336 // of the white space. This prevents buffer overruns.
1338 for (cp_w = cur; cp_w < nw; cp_w++) {
1339 line_buf[off++] = *cp_w;
1341 line_buf[off++] = '=';
1342 strncat(buf,line_buf, off);
1349 /* There is an edge case that we may have written the last
1350 // white space character in the for loop above. This will
1351 // prevent us from spitting an extra continuation line.
1354 line_buf[off++] = '=';
1355 strncat(buf,line_buf, off);
1361 /* If we created a "From " at the front we need to wrap
1362 // it to protect from parsers.
1364 if ((nw + 5) < (bp + bp_len) && strncmp(nw, "From ", 5) == 0) {
1365 memcpy(&line_buf[off], "=46", 3);
1374 line_buf[off++] = *cur;
1380 /* Rule 3: Octets with values of 9 and 32 MAY be represented
1381 // as ASCII TAB and SPACE but MUST NOT be represented at the
1382 // end of an encoded line. We solve this be encoding the last
1383 // white space before a new line (except a new line) using
1394 if ((prev == ' ' || prev == '\t') && prev != '\n') {
1395 off = off ? off - 1 : off;
1397 sprintf(tmpbuf, "=%02X", *(cur - 1));
1398 memcpy(&line_buf[off], tmpbuf, 3);
1402 strncat(buf,line_buf, off);
1406 if (*(cur - 1) == '\r') {
1407 strncat(buf,cur, 1);
1415 /* We need to munge a line that starts with "From " to it
1416 // protect from parsers. The simplest way is to encode the
1417 // "F" using rule #1.
1419 if ((cur + 5) < (bp + bp_len) && strncmp((cur + 1), "From ", 5) == 0) {
1420 memcpy(&line_buf[off], "=46", 3);
1429 strncat(buf,line_buf, off);
1432 if (*(cur - 1) != '\n') {
1442 return((c & 0x7f) == c && isspace(c));
1446 rfc1522cpy(char * buf, const char * value)
1449 const char * scan_c;
1450 boolean_t eight_bit = B_FALSE;
1452 char *ret_locale = NULL;
1453 char *ret_lang = NULL;
1454 char *ret_codeset = NULL;
1456 char *NewBuf = NULL;
1457 unsigned long _len = 0;
1459 // We are going to encode 8 bit data, one word at a time. This may
1460 // not be the best possible algorithm, but it will get the correct
1461 // information in the header.
1463 for (cur = value; *cur; cur++) {
1464 if (mbisspace(*cur)) {
1465 strncat(buf,cur, 1);
1469 for (scan_c = cur; *scan_c && !mbisspace(*scan_c); scan_c++) {
1470 if (*scan_c != (*scan_c & 0x7f)) {
1475 if (eight_bit == B_FALSE) {
1476 /* Simple! Copy the chars to the output. */
1477 strncat(buf,cur,scan_c - cur);
1482 getCharSet( charset );
1483 /* Convert default_charset to InterchangeCodeset name. */
1484 DtXlateOpToStdLocale( DtLCX_OPER_MIME,
1490 /* We have a word here. It has 8 bit data, so we will put
1491 // it out as RFC1522 chunk.
1494 if ( !strncasecmp( ret_codeset, "ISO-2022-KR", 11 ) ) {
1496 * According to RFC1557, in the Header Field, we don't use
1497 * ISO-2022-KR encoding char.
1501 ret_locale = ret_lang = _tmp1_ = NULL;
1502 DtXlateOpToStdLocale( DtLCX_OPER_INTERCHANGE_CODESET,
1508 DtXlateStdToOpLocale( DtLCX_OPER_MIME,
1514 strncat(buf,"=?", 2);
1515 strncat(buf,_tmp2_, strlen(_tmp2_));
1516 strncat(buf,"?q?", 3);
1517 } else if ( !strncasecmp( ret_codeset, "ISO-2022-JP", 11 ) ) {
1519 * According to RFC1468, in the Header Field, we should use
1522 strncat(buf,"=?", 2);
1523 strncat(buf,charset, strlen(charset));
1524 strncat(buf,"?b?", 3);
1526 strncat(buf,"=?", 2);
1527 strncat(buf,charset, strlen(charset));
1528 strncat(buf,"?q?", 3);
1532 * According to RFC1557, in the Header Field, we don't use
1533 * ISO-2022-KR encoding char. Also in us-ascci, we don't have to
1536 memset(tmp, '\0', 1024);
1537 if (!( !strncasecmp( ret_codeset, "ISO-2022-KR", 11 ) ||
1538 !strncasecmp( charset, "us-ascii", 8 ) ) )
1539 (void)CvtStr( (char *)NULL, (void *)cur, scan_c - cur,
1540 (void **)&NewBuf, &_len, CURRENT_TO_INTERNET );
1541 if ( ( NewBuf != NULL ) && ( _len != 0 ) ) {
1543 * if ret_codeset == ISO-2022-KR, we don't come here.
1546 * According to RFC1468, we should use B-encoding.
1548 if ( !strncasecmp( ret_codeset, "ISO-2022-JP", 11 ) ) {
1549 writeBase64( tmp, NewBuf, _len );
1551 writeQPrint( tmp, NewBuf, _len,
1552 (!strncasecmp( ret_codeset, "ISO-2022-TW", 11 ) ||
1553 !strncasecmp( ret_codeset, "ISO-2022-CN", 11 ) ));
1556 writeQPrint( tmp, cur, scan_c - cur, 0 );
1558 strncat(buf,tmp,strlen(tmp));