1 static char sccsid[] = "$TOG: RFCMIME.c /main/11 1999/06/30 12:08:55 mgreess $";
3 * COMPONENT_NAME: desktop
23 * OBJECT CODE ONLY SOURCE MATERIALS
37 /* Iconv not defined for linux. Use the EUSCompat stubs instead. */
41 #include <EUSCompat.h>
43 #include <LocaleXlate.h>
45 #ifdef ICONV_INBUF_CONST
46 # define ICONV_INBUF_TYPE const char **
48 # define ICONV_INBUF_TYPE char **
51 #define WORKSIZE 1024*10
53 * The following escape sequence is defined as "To ASCII".
54 * But is it correct regardless of ISO-2022-XX ???
57 static char ToASCII[ToASCII_NUM] = { 0x1b, 0x28, 0x42 };
60 * _i18nwork1[] is used to convert the passed string with CD iconv.
64 static void *_i18nwork1 = NULL;
65 static unsigned long _i18nsize1 = 0;
66 static int shouldAlloc1 = ~0;
69 * _i18nwork2[] is used to convert the passed string with CD iconv.
73 static void *_i18nwork2 = NULL;
74 static unsigned long _i18nsize2 = 0;
75 static int shouldAlloc2 = ~0;
78 * Forward declarations
80 extern void writeBase64(char * buf, const char * bp, const unsigned long len);
82 static const char *DfltStdCharset = "us-ascii";
83 static const char *DfltStdLang = "C";
85 static void crlf(char *buf)
90 /******************************************************************************
91 * Function: int DtXlateOpToStdLocale (char *operation, char *opLocale,
92 * char **ret_stdLocale, char **ret_stdLang, char **ret_stdSet)
95 * operation Operation associated with the locale value
96 * opLocale An operation-specific locale string
97 * ret_locale Returns the std locale
98 * Caller must free this string.
99 * ret_stdLang Returns the std language & territory string.
100 * Caller must free this string.
101 * ret_stdSet Returns the std code set string.
102 * Caller must free this string.
106 * Purpose: Gets the standard locale given an operation and its locale
108 *****************************************************************************/
110 DtXlateOpToStdLocale (
113 char **ret_stdLocale,
117 _DtXlateDb MyDb = NULL;
118 char MyPlatform[_DtPLATFORM_MAX_LEN + 1];
122 if (_DtLcxOpenAllDbs(&MyDb) == 0 &&
123 _DtXlateGetXlateEnv(MyDb,MyPlatform,&ExecVer,&CompVer) != 0)
125 _DtLcxCloseDb(&MyDb);
132 (void) _DtLcxXlateOpToStd(MyDb, MyPlatform, ExecVer,
134 ret_stdLocale, ret_stdLang, ret_stdSet, NULL);
137 /* if failed, give default values */
138 if (ret_stdLocale != NULL && *ret_stdLocale == NULL)
140 *ret_stdLocale = (char *)malloc(strlen(DfltStdLang)+strlen(DfltStdCharset)+3);
141 sprintf(*ret_stdLocale,"%s.%s",DfltStdLang,DfltStdCharset);
143 if (ret_stdLang != NULL && *ret_stdLang == NULL)
144 *ret_stdLang = (char *)strdup(DfltStdLang);
145 if (ret_stdSet != NULL && *ret_stdSet == NULL)
146 *ret_stdSet = (char *)strdup(DfltStdCharset);
149 /******************************************************************************
150 * Function: int DtXlateStdToOpLocale ( char *operation, char *stdLocale,
151 * char *stdLang, char *stdCodeSet,
152 * char *dflt_opLocale, char **ret_opLocale)
155 * operation operation whose locale value will be retrieved
156 * stdLocale standard locale value
157 * stdLang standard Lang/Territory Value
158 * stdCodeSet standard CodeSet Value
159 * dflt_opLocale operation-specific locale-value
160 * This is the default value used in error case
161 * ret_opLocale operation-specific locale-value placed here
162 * Caller must free this string.
166 * Purpose: Gets an operation-specific locale string given the standard string
168 *****************************************************************************/
170 DtXlateStdToOpLocale (
178 _DtXlateDb MyDb = NULL;
179 char MyPlatform[_DtPLATFORM_MAX_LEN + 1];
183 if (_DtLcxOpenAllDbs(&MyDb) == 0 &&
184 _DtXlateGetXlateEnv(MyDb,MyPlatform,&ExecVer,&CompVer) != 0)
186 _DtLcxCloseDb(&MyDb);
193 (void) _DtLcxXlateStdToOp(MyDb, MyPlatform, ExecVer,
194 operation, stdLocale, stdLang, stdCodeSet, NULL,
198 /* if translation fails, use a default value */
199 if (ret_opLocale && *ret_opLocale == NULL)
201 if (dflt_opLocale) *ret_opLocale = (char *)strdup(dflt_opLocale);
202 else if (stdLocale) *ret_opLocale = (char *)strdup(stdLocale);
208 char *ret_locale = NULL;
209 char *ret_lang = NULL;
210 char *ret_codeset = NULL;
211 char *ret_target = NULL;
213 DtXlateOpToStdLocale(DtLCX_OPER_SETLOCALE,
214 setlocale(LC_CTYPE, NULL),
218 DtXlateStdToOpLocale(DtLCX_OPER_INTERCHANGE_CODESET,
224 DtXlateStdToOpLocale(DtLCX_OPER_MIME,
235 getCharSet(char * charset)
239 mimeCS = targetTagName();
242 strcpy(charset, mimeCS);
244 strcpy(charset, "us-ascii"); /* default MIME codeset */
250 md5PlainText(const char * bp, const unsigned long len, unsigned char * digest)
252 /* We need to compute the md5 signature based on a message that has
253 // the CRLF line terminator. Most of our buffers don't so we will need
254 // to scan the body and do some magic. The approach will be to sum
255 // one line at a time. If the buffer doesn't have CRLF we will do that
260 unsigned char * local_crlf = (unsigned char *)"\r\n";
261 const char * last = bp;
265 for (cur = bp; cur < (bp + len); cur++) {
267 if (cur == bp || *(cur - 1) == '\r') {
268 MD5Update(&context, (unsigned char *)last,
272 MD5Update(&context, (unsigned char *)last,
274 MD5Update(&context, local_crlf, 2);
280 if (bp[len - 1] != '\n') {
281 /* Need to sum the trailing fraction with a CRLF. */
282 MD5Update(&context, (unsigned char *)last,
284 MD5Update(&context, local_crlf, 2);
287 MD5Final(digest, &context);
291 static void _converter_( iconv_t CD,
292 void *from, unsigned long from_len,
293 void **to, unsigned long *to_len )
298 size_t OutBytesLeft = 0;
299 size_t _OutBytesLeft = 0;
301 size_t converted_num = 0;
307 if ( shouldAlloc1 ) {
308 /* Obtain work area */
309 _i18nwork1 = (size_t *)malloc( WORKSIZE );
314 _i18nsize1 = WORKSIZE;
318 InBuf = (char *)from;
319 InBytesLeft = from_len;
320 OutBytesLeft = _i18nsize1;
321 OutBuf = (char *)_i18nwork1;
324 * Need to place iconv state to the initial one by
325 * setting inbuf to NULL of iconv().
327 iconv( CD, (ICONV_INBUF_TYPE)NULL, 0, NULL, 0 );
332 * +----------------------------+
334 * +----------------------------+
335 * <-------------------------->
344 * +----------------------------+
346 * +----------------------------+
347 * <-------------------------->
351 iconv_ret = iconv( CD, (ICONV_INBUF_TYPE)&InBuf, &InBytesLeft,
352 &OutBuf, &OutBytesLeft );
353 if ( iconv_ret == 0 ) {
357 * +----------------------------+
358 * |XXXXXXXXXXXXXXXXXXXXXXXXXXXX|
359 * +----------------------------+
366 * +----------------------------+
367 * |XXXXXXXXXXXXXXXXX| | | |
368 * +----------------------------+
369 * <---------------> <-------->
370 * converted_num OutBytesLeft
372 converted_num = (unsigned long)((char *)OutBuf-(char *)_i18nwork1);
373 *to = (void *)_i18nwork1;
374 *to_len = (unsigned long)converted_num;
377 if ( errno == E2BIG ) {
378 /* Overflow. still data is left.
381 * +----------------------------+
382 * |XXXXXXXXXXXXXX| | | |
383 * +----------------------------+
390 * +----------------------------+
391 * |XXXXXXXXXXXXXXXXXXXXXXXXXXX |
392 * +----------------------------+
393 * <------------------------->
394 * converted_num OutBytesLeft=?
398 /* Check how many converted already.
401 (unsigned long)((char *)OutBuf - (char *)_i18nwork1);
402 _i18nsize1 += WORKSIZE;
403 _p = realloc( _i18nwork1, _i18nsize1 );
414 OutBuf = (char *)((char*)_i18nwork1 + converted_num);
415 OutBytesLeft += WORKSIZE;
428 CvtStr( char *charSet, void *from, unsigned long from_len,
429 void **to, unsigned long *to_len, Direction dir )
431 char *ret_locale = NULL;
432 char *ret_lang = NULL;
433 char *ret_codeset = NULL;
434 char *from_codeset = NULL;
435 char *to_codeset = NULL;
436 char *CuStdCodeSet = NULL;
437 char *InterChCodeSet = NULL;
438 char *StdCodeSet = NULL;
441 int isStopASCII = ~0;
442 unsigned long converted_num = 0;
444 /* Get CuStdCodeSet */
445 DtXlateOpToStdLocale( DtLCX_OPER_SETLOCALE,
446 setlocale( LC_CTYPE, NULL ),
452 * If charSet is NULL, it means the passed string's charset in *from is
453 * unknown by dtmail. In this case, this converter assumes that
454 * when dir = CURRENT_TO_INTERNET,
455 * *from's encoding is the current locale's one.
456 * when dir = INTERNET_TO_CURRENT,
457 * *from's encoding is the current locale's Internet Message's one.
460 * dtmail is running under ja_JP locale.
461 * dir : CURRENT_TO_INTERNET
464 * dir : INTERNET_TO_CURRENT
465 * *from = ISO-2022-JP
469 * ISO-2022-JP can be converted to either EUC-JP or IBM-932 practically.
470 * But the current AIX.lcx says
471 * StdCodeSet InterchangeCodeset
472 * EUC-JP <--> ISO-2022-JP
473 * IBM-932 ---> ISO-2022-JP
474 * HP-SJIS ---> ISO-2022-JP
475 * HP-KANA8 ---> ISO-2022-JP
476 * therefore DtXlateOpToStdLocale() can convert ISO-2022-JP to EUC-JP only.
477 * To fix this, we hard-code'ed this situation with the CDE Standard Name
479 * ???? Is it correct ???
481 if ( dir == INTERNET_TO_CURRENT ) {
483 * As for from_codeset
485 if ( ( charSet == NULL ) || ( *charSet == '\0' ) ) {
486 /* Convert CuStdCodeSet to StdInterChangeCodeSet */
487 DtXlateStdToOpLocale( DtLCX_OPER_INTERCHANGE_CODESET,
494 /* Convert charSet to StdInterChangeCodeSet */
495 ret_locale = ret_lang = ret_codeset = NULL;
496 DtXlateOpToStdLocale( DtLCX_OPER_MIME,
503 /* Convert StdInterChangeCodeSet to OpIVONC3 codeset */
504 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
513 if ( ( charSet == NULL ) || ( *charSet == '\0' ) ) {
514 /* Convert CuStdCodeSet to OpIVONC3 codeset */
515 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
523 if ( (!strncasecmp(InterChCodeSet,"ISO-2022-JP",11) &&
524 !strncasecmp(CuStdCodeSet,"IBM-932",7) ) ||
525 (!strncasecmp(InterChCodeSet,"ISO-2022-JP",11) &&
526 !strncasecmp(CuStdCodeSet,"EUC-JP",6) ) ) {
527 ret_codeset = CuStdCodeSet;
531 /* Convert InterChCodeSet to StdCodeSet */
532 ret_locale = ret_lang = ret_codeset = NULL;
533 DtXlateOpToStdLocale( DtLCX_OPER_INTERCHANGE_CODESET,
539 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
546 } else { /* dir == CURRENT_TO_INTERNET */
548 * As for from_codeset
550 if ( ( charSet == NULL ) || ( *charSet == '\0' ) ) {
551 /* Convert CuStdCodeSet to OpICONV3 codeset */
552 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
559 /* Convert charSet to StdInterChangeCodeSet */
560 ret_locale = ret_lang = ret_codeset = NULL;
561 DtXlateOpToStdLocale( DtLCX_OPER_MIME,
566 /* Convert StdInterChangeCodeSet to OpIVONC3 codeset */
567 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
577 if ( ( charSet == NULL ) || ( *charSet == '\0' ) ) {
578 /* Convert CuStdCodeSet to StdInterChangeCodeSet */
579 DtXlateStdToOpLocale( DtLCX_OPER_INTERCHANGE_CODESET,
586 /* Convert charSet to StdInterChangeCodeSet */
587 ret_locale = ret_lang = ret_codeset = NULL;
588 DtXlateOpToStdLocale( DtLCX_OPER_MIME,
595 /* Convert StdInterChangeCodeSet to OpIVONC3 codeset */
596 DtXlateStdToOpLocale( DtLCX_OPER_ICONV3,
607 if ( shouldAlloc2 ) {
608 /* Obtain work area */
609 _i18nwork2 = (size_t *)malloc( WORKSIZE );
614 _i18nsize2 = WORKSIZE;
618 if (NULL == to_codeset || NULL == from_codeset)
621 if ( ( CD = iconv_open( to_codeset, from_codeset ) ) != (iconv_t)-1 ) {
623 * According to several RFCs( 822, 1468, 1557, ..... )
624 * the escape sequence to switch to ASCII is needed just before
625 * '\n'. IBM-eucJP/IBM-932 <--> fold 7 does while the other doesn't.
626 * Therefore CvtStr() does take care of this here.
628 if ( dir == INTERNET_TO_CURRENT ) {
629 _converter_( CD, from, from_len, to, to_len );
631 void *new_from = from;
632 unsigned long new_from_len = from_len;
633 unsigned long _passed = 0;
636 unsigned long _tmp_len = 0;
638 while ( _passed < from_len ) {
642 for ( ; _passed < from_len; _passed += clen ) {
643 clen = mblen(&(((char *)from)[_passed]), MB_CUR_MAX);
647 if ( ( clen > 1 ) || !isascii( ((char*)from)[_passed] ) ){
648 /* Here, maybe MB or non-ASCII */
652 if ( ( ((char*)from)[_passed] != '\n' ) &&
653 ( ((char*)from)[_passed] != '\0' ) ) {
658 if ( ((char*)from)[_passed] == '\n' ||
659 ((char*)from)[_passed] == '\0' )
662 new_from_len = &(((char *)from)[_passed])-(char *)new_from;
663 if ( ( _passed < from_len ) && ( clen == 1 ) &&
664 ( ((char*)from)[_passed] == '\n' ) ) {
669 * new_from from[_passed]
671 * +------------------------+--+------.................+
673 * +------------------------+--+------.................+
674 * <-------------------------> $
675 * new_from_len next new_from
676 * <------------------------------------------------->
682 * ********** DO 1 LINE CONVERSION **********
684 _tmp = NULL; _tmp_len = 0;
685 _converter_( CD, new_from, new_from_len, &_tmp, &_tmp_len );
687 if ( ( _tmp == NULL ) && ( _tmp_len == 0 ) ) {
688 /* Conversion fail */
697 * +-----------------------+ +-------------+
699 * +-----------------------+ +-------------+
702 * <---------------------> <----------->
705 /* Append _tmp to target */
706 if ( converted_num + _tmp_len > _i18nsize2 ) {
707 /* Need much memory..... */
710 _i18nsize2 += WORKSIZE;
711 _i18n = realloc( _i18nwork2, _i18nsize2 );
727 * +---------------------------+
728 * |XXXXXXXXXXX(COPIED)XX |
729 * +---------------------------+
730 * <---------><-------->
731 * (old)conv. _tmp_len
732 * <------------------->
734 * <-------------------------->
737 strncpy( (char *)_i18nwork2 + converted_num,
738 (char *)_tmp, _tmp_len );
739 converted_num += _tmp_len;
741 *to = (void *)_i18nwork2;
742 *to_len = converted_num;
743 new_from = &(((char *)from)[_passed]) + 1;
747 * According to RFC1468, if the line is ended with non-ASCII
748 * char, but not not switch to ASCII before the end of line,
749 * we must switch to ASCII just before the end of the line.
753 * +---------------------------+ +------+
754 * |XXXXXXXXXXXXXXXXXXXXX | + |1b2842|
755 * +---------------------------+ +------+
756 * <-------------------> <---->
757 * converted_num ToASCII_NUM
758 * <------------------------->
764 if ( !isStopASCII ) {
765 if ( (((char *)_i18nwork2)[converted_num-1] == '\n') ||
766 (((char *)_i18nwork2)[converted_num-1] == '\0') ) {
768 char _p = ((char *)_i18nwork2)[converted_num-1];
770 if (!((converted_num >=3+1 ) &&
771 !memcmp((void *)((char *)_i18nwork2+converted_num-3-1),
772 ToASCII, ToASCII_NUM )) ){
773 /* if not ToASCII there, then */
776 if ( converted_num + ToASCII_NUM > _i18nsize2 ) {
777 /* Need much memory..... */
780 _i18nsize2 += WORKSIZE;
781 _i18n=realloc(_i18nwork2,_i18nsize2);
791 strncpy( (char *)_i18nwork2+converted_num-1,
792 ToASCII, ToASCII_NUM );
793 converted_num += ToASCII_NUM;
794 ((char *)_i18nwork2)[converted_num-1] = _p;
796 *to_len = converted_num;
800 if (!((converted_num >=3 ) &&
801 !memcmp((void *)((char *)_i18nwork2+converted_num-3),
802 ToASCII, ToASCII_NUM )) ){
804 // if not ToASCII there, then
807 if ( converted_num + ToASCII_NUM > _i18nsize2 ) {
810 _i18nsize2 += WORKSIZE;
811 _i18n=realloc(_i18nwork2,_i18nsize2);
821 strncpy( (char *)_i18nwork2+converted_num,
822 ToASCII, ToASCII_NUM );
823 converted_num += ToASCII_NUM;
825 *to_len = converted_num;
834 if( ( *to != NULL ) && ( *to_len != 0 ) ) {
835 if ( !isStopASCII ) {
836 if ( (((char *)_i18nwork2)[converted_num-1] == '\n') ||
837 (((char *)_i18nwork2)[converted_num-1] == '\0') ) {
839 char _p = ((char *)_i18nwork2)[converted_num-1];
841 if (!((converted_num >=3+1 ) &&
842 !memcmp((void *)((char *)_i18nwork2+converted_num-3-1),
843 ToASCII, ToASCII_NUM )) ){
844 /* if not ToASCII there, then
848 if ( converted_num + ToASCII_NUM > _i18nsize2 ) {
851 _i18nsize2 += WORKSIZE;
852 _i18n=realloc(_i18nwork2,_i18nsize2);
861 strncpy( (char *)_i18nwork2+converted_num-1,
862 ToASCII, ToASCII_NUM );
863 converted_num += ToASCII_NUM;
864 ((char *)_i18nwork2)[converted_num-1] = _p;
866 *to_len = converted_num;
870 if (!((converted_num >=3 ) &&
871 !memcmp((void *)((char *)_i18nwork2+converted_num-3),
872 ToASCII, ToASCII_NUM )) ){
873 /* if not ToASCII there, then
877 if ( converted_num + ToASCII_NUM > _i18nsize2 ) {
880 _i18nsize2 += WORKSIZE;
881 _i18n=realloc(_i18nwork2,_i18nsize2);
890 strncpy( (char *)_i18nwork2+converted_num,
891 ToASCII, ToASCII_NUM );
892 converted_num += ToASCII_NUM;
894 *to_len = converted_num;
901 * If InterChCodeSet is either ISO-2022-JP/ISO-2022-KR/ISO-2022-TW
902 * ISO-2022-CN, strip MSB here since iconv'ed UDC has MSB set to 1
904 if ( !strncasecmp( InterChCodeSet, "ISO-2022-JP", 11 ) ||
905 !strncasecmp( InterChCodeSet, "ISO-2022-KR", 11 ) ||
906 !strncasecmp( InterChCodeSet, "ISO-2022-TW", 11 ) ||
907 !strncasecmp( InterChCodeSet, "ISO-2022-CN", 11 ) ) {
910 for ( _i = 0; _i < *to_len; _i++ ) {
911 ((unsigned char *)(*to))[_i] &= (unsigned char)0x7f;
920 base64size(const unsigned long len)
922 int b_len = len + (len / 3);
923 b_len += (b_len / 72 * 2) + 4;
928 getEncodingType(const char * body,
929 const unsigned int len,
930 boolean_t strict_mime)
933 // Our goal here is to produce the most readable, safe encoding.
934 // We have a couple of parameters that will guide our
937 // 1) RFC822 allows lines to be a minimum of 1000 characters,
938 // but MIME encourages mailers to keep lines to <76 characters
939 // and use quoted-printable if necessary to achieve this.
941 // 2) The base64 encoding will grow the body size by 33%, and
942 // also render it unreadable by humans. We don't want to use
943 // it unless really necessary.
945 // Given the above 2 rules, we want to scan the body part and
946 // select an encoding. The 3 choices will be decided by:
948 // 1) If the text is 7 bit clean, and all lines are <76 chars,
949 // then no encoding will be applied.
952 // 2) If the text is not 7 bit clean, or there are lines >76 chars,
953 // and the quoted-printable size is less than the base64 size,
954 // then quoted-printable will be done.
956 // 3) If 1 & 2 are not true, then base64 will be applied.
958 // If "strict_mime" is false we will only encode if the message
959 // is not 7 bit clean.
963 int base64_growth = base64size(len) - len;
964 int qprint_growth = 0;
965 boolean_t eight_bit = B_FALSE;
966 boolean_t base64 = B_FALSE;
967 boolean_t encode = B_FALSE;
970 const char * last_nl = body;
973 if (body == NULL || len == 0) {
976 if (strncmp(body, "From ", 5) == 0) {
980 for (cur = body; cur < (body + len); cur++) {
981 if (*cur != (*cur & 0x7f)) {
985 } else if (*cur == '=' || *cur == 0) {
987 // These characters don't force encoding, but will be
988 // encoded if we end up encoding.
994 if ((cur - last_nl) > 76) {
1000 if ((cur != body && (*(cur - 1) == ' ' || *(cur - 1) == '\t'))) {
1005 if ((cur + 6) < (body + len) &&
1006 strncmp((cur + 1), "From ", 5) == 0) {
1014 if (encode && (qprint_growth > base64_growth)) {
1020 /* Deal with buffers that dont end with a new line. */
1022 if ((cur - last_nl) > 76) {
1029 if (!strict_mime && !eight_bit) {
1030 /* If strict_mime is off we only encode if we have 8 bit data */
1032 } else if (encode) {
1033 /* strict_mime is TRUE and we have reason to encode. */
1045 writeContentHeaders(char * hdr_buf,
1048 const char * digest,
1051 char default_charset[64];
1053 strcat(hdr_buf,"Content-Type: ");
1054 strcat(hdr_buf,type);
1057 strcpy(default_charset,"US-ASCII");
1059 getCharSet(default_charset);
1061 strcat(hdr_buf,"; charset=");
1063 strcat(hdr_buf,default_charset);
1067 strcat(hdr_buf,"Content-Transfer-Encoding: ");
1071 strcat(hdr_buf,"7bit\n");
1075 default: /* Assume the worst. */
1076 strcat(hdr_buf,"8bit\n");
1079 strcat(hdr_buf,"quoted-printable\n");
1083 strcat(hdr_buf,"base64\n");
1087 strcat(hdr_buf,"Content-MD5: ");
1088 writeBase64(hdr_buf, digest, 16);
1092 // Base64 Alphabet (65-character subset of US-ASCII as per RFC1521)
1096 static const char base64_chars[] =
1097 {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
1098 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a',
1099 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
1100 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0',
1101 '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
1105 writeBase64(char * buf, const char * bp, const unsigned long len)
1108 /* The length has to be a multiple of 3. We will need to pad
1109 // any extra. Let's just work on the main body and save the
1110 // fractional stuff for the end.
1112 unsigned long main_len = len - (len % 3);
1113 const unsigned char * ubp = (const unsigned char *)bp;
1117 unsigned int enc_char;
1123 if (bp == NULL || len == 0) {
1128 for (block = 0; block < main_len; block += 3) {
1129 enc_char = (ubp[block] >> 2) & 0x3f;
1130 line[lf++] = base64_chars[enc_char];
1132 enc_char = ((ubp[block] & 0x3) << 4) | ((ubp[block+1] >> 4) & 0xf);
1133 line[lf++] = base64_chars[enc_char];
1135 enc_char = ((ubp[block + 1] & 0xf) << 2) | ((ubp[block + 2] >> 6) & 0x3);
1136 line[lf++] = base64_chars[enc_char];
1138 enc_char = ubp[block + 2] & 0x3f;
1139 line[lf++] = base64_chars[enc_char];
1142 strncat(buf,line,lf);
1149 strncat(buf, line,lf);
1152 if (((lf + 4) % 72) == 0) {
1158 enc_char = (ubp[block] >> 2) & 0x3f ;
1159 strncat(buf, &base64_chars[enc_char], 1);
1161 enc_char = ((ubp[block] & 0x3) << 4);
1162 strncat(buf, &base64_chars[enc_char], 1);
1164 strncat(buf,"==", 2);
1168 enc_char = (ubp[block] >> 2) & 0x3f;
1169 strncat(buf,&base64_chars[enc_char], 1);
1171 enc_char = ((ubp[block] & 0x3) << 4) | ((ubp[block+1] >> 4) & 0xf);
1172 strncat(buf,&base64_chars[enc_char], 1);
1174 enc_char = ((ubp[block + 1] & 0xf) << 2);
1175 strncat(buf,&base64_chars[enc_char], 1);
1177 strncat(buf,"=", 1);
1183 writeQPrint(char *buf, const char * bp, const unsigned long bp_len,
1198 if (bp == NULL || bp_len == 0) {
1205 // A line buffer for improving formatting performance. Note that
1206 // QP requires all lines to be < 72 characters plus CRLF. So, a
1207 // fixed size 80 character buffer is safe.
1210 /* There are probably more elegant ways to deal with a message that
1211 // begins with "From ", but we will simply due it this more simplistic
1214 if (strncmp(bp, "From ", 5) == 0) {
1215 memcpy(&line_buf[off], "=46", 3);
1223 /* This loop will apply the encodings, following the rules identified
1224 // in RFC1521 (though not necessarily in the order presented.
1226 for (cur = start; cur < (bp + bp_len); cur++) {
1228 /* Rule #5: Part 1! We will try to break at white space
1229 // if possible, but it may not be possible. In any case,
1230 // we want to force the lines to be less than 76 characters.
1233 line_buf[off++] = '=';
1234 strncat(buf,line_buf, off);
1240 /* Rule #1: Any octet, except those indicating a line break
1241 // according to the newline convention mabe represented by
1242 // an = followed by a two digit hexadecimal representation
1243 // of the octet's value. We will represent any non-7bit
1244 // data this way, but let the rest slide. We do wrap "="
1247 if (*cur != (*cur & 0x7f) || *cur == '=') {
1249 sprintf(tmp, "=%02X", (int)(unsigned char)*cur);
1250 memcpy(&line_buf[off], tmp, 3);
1256 * Under ISO-2022-XX codeset, several escape sequence may be in
1257 * From, Subject field. To pass them, writeQPrint() also accept
1258 * such kind of character.
1260 if ( *cur == (char)0x1b ) {
1261 /* Only 0x1b ????? */
1263 sprintf(tmp, "=%02X", (int)(unsigned char)*cur);
1264 memcpy(&line_buf[off], tmp, 3);
1270 /* Rule #2: Octets with decimal values of 33 through 60
1271 // inclusive and 62 through 126, inclusive, MAY be represented
1272 // as the ASCII characters which correspond to those octets.
1274 if ((*cur >= 33 && *cur <= 60) ||
1275 (*cur >= 62 && *cur <= 126)) {
1276 line_buf[off++] = *cur;
1280 /* Rule #5: The q-p encoding REQUIRES that encoded lines be
1281 // no more than 76 characters long. If longer, an equal sign
1282 // as the last character n the line indicates a soft line break.
1284 // This is tricky if you want to leave it reasonably readable
1285 // (why else do this?). We only want to break on white space.
1286 // At each white gap, we need to count forward to the next
1287 // white gap and see if we exceed the 76 character limit.
1288 // We will cheat a few characters to allow us some room
1291 if (*cur == ' ' || *cur == '\t') {
1292 /* Find the end of this clump of white space.
1295 nw < (bp + bp_len) && *nw && *nw != '\n'; nw++) {
1296 if (!isspace(*nw)) {
1301 /* Find the end of the next non-white region.
1304 white < (bp + bp_len) && *white && !isspace(*white);
1309 line_len = (off - last_nl) + (white - cur);
1310 if (line_len > 72) {
1311 /* Need a soft line break. Lets put it after the
1312 // current clump of white space. We will break
1313 // at 72 characters, even if we arent at the end
1314 // of the white space. This prevents buffer overruns.
1316 for (cp_w = cur; cp_w < nw; cp_w++) {
1317 line_buf[off++] = *cp_w;
1319 line_buf[off++] = '=';
1320 strncat(buf,line_buf, off);
1327 /* There is an edge case that we may have written the last
1328 // white space character in the for loop above. This will
1329 // prevent us from spitting an extra continuation line.
1332 line_buf[off++] = '=';
1333 strncat(buf,line_buf, off);
1339 /* If we created a "From " at the front we need to wrap
1340 // it to protect from parsers.
1342 if ((nw + 5) < (bp + bp_len) && strncmp(nw, "From ", 5) == 0) {
1343 memcpy(&line_buf[off], "=46", 3);
1352 line_buf[off++] = *cur;
1358 /* Rule 3: Octets with values of 9 and 32 MAY be represented
1359 // as ASCII TAB and SPACE but MUST NOT be represented at the
1360 // end of an encoded line. We solve this be encoding the last
1361 // white space before a new line (except a new line) using
1372 if ((prev == ' ' || prev == '\t') && prev != '\n') {
1373 off = off ? off - 1 : off;
1375 sprintf(tmpbuf, "=%02X", *(cur - 1));
1376 memcpy(&line_buf[off], tmpbuf, 3);
1380 strncat(buf,line_buf, off);
1384 if (*(cur - 1) == '\r') {
1385 strncat(buf,cur, 1);
1393 /* We need to munge a line that starts with "From " to it
1394 // protect from parsers. The simplest way is to encode the
1395 // "F" using rule #1.
1397 if ((cur + 5) < (bp + bp_len) && strncmp((cur + 1), "From ", 5) == 0) {
1398 memcpy(&line_buf[off], "=46", 3);
1407 strncat(buf,line_buf, off);
1410 if (*(cur - 1) != '\n') {
1420 return((c & 0x7f) == c && isspace(c));
1424 rfc1522cpy(char * buf, const char * value)
1427 const char * scan_c;
1428 boolean_t eight_bit = B_FALSE;
1430 char *ret_locale = NULL;
1431 char *ret_lang = NULL;
1432 char *ret_codeset = NULL;
1434 char *NewBuf = NULL;
1435 unsigned long _len = 0;
1437 // We are going to encode 8 bit data, one word at a time. This may
1438 // not be the best possible algorithm, but it will get the correct
1439 // information in the header.
1441 for (cur = value; *cur; cur++) {
1442 if (mbisspace(*cur)) {
1443 strncat(buf,cur, 1);
1447 for (scan_c = cur; *scan_c && !mbisspace(*scan_c); scan_c++) {
1448 if (*scan_c != (*scan_c & 0x7f)) {
1453 if (eight_bit == B_FALSE) {
1454 /* Simple! Copy the chars to the output. */
1455 strncat(buf,cur,scan_c - cur);
1460 getCharSet( charset );
1461 /* Convert default_charset to InterchangeCodeset name. */
1462 DtXlateOpToStdLocale( DtLCX_OPER_MIME,
1468 /* We have a word here. It has 8 bit data, so we will put
1469 // it out as RFC1522 chunk.
1472 if ( !strncasecmp( ret_codeset, "ISO-2022-KR", 11 ) ) {
1474 * According to RFC1557, in the Header Field, we don't use
1475 * ISO-2022-KR encoding char.
1479 ret_locale = ret_lang = _tmp1_ = NULL;
1480 DtXlateOpToStdLocale( DtLCX_OPER_INTERCHANGE_CODESET,
1486 DtXlateStdToOpLocale( DtLCX_OPER_MIME,
1492 strncat(buf,"=?", 2);
1493 strncat(buf,_tmp2_, strlen(_tmp2_));
1494 strncat(buf,"?q?", 3);
1495 } else if ( !strncasecmp( ret_codeset, "ISO-2022-JP", 11 ) ) {
1497 * According to RFC1468, in the Header Field, we should use
1500 strncat(buf,"=?", 2);
1501 strncat(buf,charset, strlen(charset));
1502 strncat(buf,"?b?", 3);
1504 strncat(buf,"=?", 2);
1505 strncat(buf,charset, strlen(charset));
1506 strncat(buf,"?q?", 3);
1510 * According to RFC1557, in the Header Field, we don't use
1511 * ISO-2022-KR encoding char. Also in us-ascci, we don't have to
1514 memset(tmp, '\0', 1024);
1515 if (!( !strncasecmp( ret_codeset, "ISO-2022-KR", 11 ) ||
1516 !strncasecmp( charset, "us-ascii", 8 ) ) )
1517 (void)CvtStr( (char *)NULL, (void *)cur, scan_c - cur,
1518 (void **)&NewBuf, &_len, CURRENT_TO_INTERNET );
1519 if ( ( NewBuf != NULL ) && ( _len != 0 ) ) {
1521 * if ret_codeset == ISO-2022-KR, we don't come here.
1524 * According to RFC1468, we should use B-encoding.
1526 if ( !strncasecmp( ret_codeset, "ISO-2022-JP", 11 ) ) {
1527 writeBase64( tmp, NewBuf, _len );
1529 writeQPrint( tmp, NewBuf, _len,
1530 (!strncasecmp( ret_codeset, "ISO-2022-TW", 11 ) ||
1531 !strncasecmp( ret_codeset, "ISO-2022-CN", 11 ) ));
1534 writeQPrint( tmp, cur, scan_c - cur, 0 );
1536 strncat(buf,tmp,strlen(tmp));