2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
27 * $TOG: RFCMailValues.C /main/10 1998/09/03 07:01:06 mgreess $
29 * RESTRICTED CONFIDENTIAL INFORMATION:
31 * The information in this document is subject to special
32 * restrictions in a confidential disclosure agreement bertween
33 * HP, IBM, Sun, USL, SCO and Univel. Do not distribute this
34 * document outside HP, IBM, Sun, USL, SCO, or Univel wihtout
35 * Sun's specific written approval. This documment and all copies
36 * and derivative works thereof must be returned or destroyed at
39 * Copyright 1993 Sun Microsystems, Inc. All rights reserved.
44 #ifndef I_HAVE_NO_IDENT
47 #include <EUSCompat.h>
54 #include <DtMail/IO.hh>
57 #include "str_utils.h"
59 // String values. These assume an RFC format for now. They will
60 // apply RFC1522 coding rules to the strings for dealing with
61 // non-ASCII text in RFC headers.
65 RFCValue::RFCValue(const char * str, int size) : DtMailValue(NULL)
67 _value = (char *)malloc(size + 1);
68 memcpy(_value, str, size);
75 RFCValue::RFCValue(const char * str, int size, DtMail::Session *s) : DtMailValue(NULL)
77 _value = (char *)malloc(size + 1);
78 memcpy(_value, str, size);
85 RFCValue::~RFCValue(void)
93 decode1522(const char * enc_start, const char * max_end, char **output, DtMail::Session *s)
95 // Find the end of the encoded region.
99 for (enc_end = enc_start;
100 *enc_end && enc_end < max_end;
103 if (*enc_end == '?') {
105 if (qs > 3 && *(enc_end + 1) == '=') {
111 if (*enc_end != '?') {
117 // Pull off the char set name.
120 for (cs_end = enc_start + 2; *cs_end != '?'; cs_end++) {
124 int cs_name_length = cs_end - enc_start - 2;
125 char *cs_name = (char*) malloc(cs_name_length + 1);
127 strncpy(cs_name, enc_start + 2, cs_name_length);
128 cs_name[cs_name_length] = 0;
130 // Set the encoding method and start of buffer.
132 char encoding = *(cs_end + 1);
133 const char * buf_start = cs_end + 3;
135 switch (toupper(encoding)) {
139 RFCMIME::readQPrint(*output, len, buf_start, enc_end - buf_start - 1);
147 RFCMIME::readBase64(*output, len, buf_start, enc_end - buf_start - 1);
153 // Invalid encoding. Assume a false match.
158 // Do codeset conversion if charset is present
159 char *from_cs = s->csToConvName(cs_name);
160 char *to_cs = s->locToConvName();
161 if ( from_cs && to_cs ) {
162 if ( strcasecmp(from_cs, to_cs) != 0 ) {
163 unsigned long tmplen = (unsigned long) strlen(*output);
164 (void) s->csConvert(&(*output), tmplen, 1, from_cs, to_cs);
178 RFCValue::operator const char *(void)
190 RFCValue::operator= (const char * str)
201 _value = strdup(str);
206 static const char * DaysOfTheWeek[] = {
207 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
210 static const char * MonthsOfTheYear[] = {
218 matchDay(const char * start, const char * end)
220 int len = end - start + 1;
222 for (int i = 0; i < 7; i++) {
223 if (strncmp(DaysOfTheWeek[i], start, len) == 0) {
232 matchMonth(const char * start, const char * end)
234 int len = end - start + 1;
236 for (int i = 0; i < 12; i++) {
237 if (strncmp(MonthsOfTheYear[i], start, len) == 0) {
246 parseTime(const char * start, const char * end, tm & val)
248 int size = end - start + 1;
250 // Time will be in the form hh:mm:ss where seconds are optional.
253 strncpy(num_buf, start, 2);
256 val.tm_hour = (int) strtol(num_buf, NULL, 10);
258 strncpy(num_buf, &start[3], 2);
261 val.tm_min = (int) strtol(num_buf, NULL, 10);
264 strncpy(num_buf, &start[6], 2);
266 val.tm_sec = (int) strtol(num_buf, NULL, 10);
275 static const char * TZNames[] = {
276 "EST", "CST", "MST", "PST"
279 static const char * TZNamesDST[] = {
280 "EDT", "CDT", "MDT", "PDT"
284 parseTZ(const char * start, const char * end)
286 int size = end - start + 1;
288 // There are at 3 possibilities that we understand. There
289 // is the single letter military time zone. In that case
290 // Z is 0 UTC. A-M is -1 to -12, skipping J. N-Y is +1 to +12
293 // Lets start with that one because it is the easiest.
297 if (*start >= 'A' && *start <= 'I') {
298 hours_from = *start - 'A' + 1;
300 else if (*start >= 'L' && *start <= 'M') {
301 hours_from = *start - 'K' + 10;
303 else if (*start >= 'N' && *start <= 'Y') {
304 hours_from = ('N' - *start) - 1;
307 return(hours_from * 3600);
310 // The next option is one of the ANSI standard time zones. These
311 // are three letter abbrievations that tell us where DST in in effect.
312 // So, if we have a length of three, lets see if it is in the table.
314 // First normal zones.
316 for (i = 0; i < 4; i++) {
317 if (strncmp(start, TZNames[i], 3) == 0) {
318 return((5 + i) * -3600);
323 for (i = 0; i < 4; i++) {
324 if (strncmp(start, TZNames[i], 3) == 0) {
325 return((4 + i) * -3600);
330 // Finally we understand +/- HHMM from UTC.
332 int sign = (*start == '+') ? 1 : -1;
335 strncpy(num_buf, &start[1], 2);
337 int hours = (int) strtol(num_buf, NULL, 10);
339 strncpy(num_buf, &start[3], 2);
341 int minutes = (int) strtol(num_buf, NULL, 10);
343 return(sign * ((hours * 3600) + (minutes * 60)));
346 // We have no idea at this point, and it is very unlikely that the
347 // text is meaningful to the reader either. Set the zone to UTC and
348 // punt. It is also possible that the text is "UT" or "GMT" in which
349 // case offset 0 is the right answer.
355 RFCValue::toDate(void)
357 DtMailValueDate date;
358 const char * pos = _value;
361 memset(&date, 0, sizeof(date));
362 memset(&new_time, 0, sizeof(new_time));
365 date.dtm_tz_offset_secs = 0;
367 // Before doing anything, check to see if _value is valid.
368 // Some messages have no Date string. Return date with zeroed fields
371 if (!_value || (strlen(_value) == 0)) return (date);
373 // Find the first non-blank
374 for (; *pos && isspace((unsigned char)*pos); pos++) {
378 // There are usually no more than 6 tokens in an RFC date. We will
379 // have a few extras just in case we are given a weird string.
380 const char *token_begin[12];
381 const char *token_end[12];
384 // Look for the end of each token. Date tokens are white space
387 token_begin[n_tokens] = pos;
388 for (; *pos && !isspace((unsigned char)*pos); pos++) {
393 token_end[n_tokens++] = pos - 1;
396 token_end[n_tokens++] = pos;
399 for (; *pos && isspace((unsigned char)*pos); pos++) {
402 // This means the message is most likely corrupted so just bail out
407 // Some dates will have a comma after the day of the week. We
408 // want to remove that. It will always be the first token if
409 // we have the day of the week.
410 if (*token_end[0] == ',') {
418 // There are two possible formats, and many variations, that we
419 // will see in an RFC message. They are:
421 // Tue Oct 12 10:36:10 1993
422 // Tue, 12 Oct 1993 10:35:05 PDT
424 // The first is the 821 format put on by sendmail. The second is
425 // one of the many variants of the 822 format. The big difference
426 // we must detect is "mon dd time year" vs "dd mon year time tz"
428 // The first qualifier is usually the day of the week. For our purposes,
429 // we will simply throw it away. This information will be recomputed
430 // based on the date and time.
434 int day = matchDay(token_begin[this_token], token_end[this_token]);
440 // This token should either be a numeric day, or an alpha month.
441 // Lets see if it is a month. If so, we know what the rest of
442 // the date will look like.
444 int month = matchMonth(token_begin[this_token], token_end[this_token]);
446 new_time.tm_mon = month;
448 // Now should be the day of the month.
452 if (this_token == n_tokens) {
456 strncpy(num_buf, token_begin[this_token], 2);
458 new_time.tm_mday = (int) strtol(num_buf, NULL, 10);
461 if (this_token == n_tokens) {
465 parseTime(token_begin[this_token], token_end[this_token], new_time);
468 if (this_token == n_tokens) {
473 // Sometimes the Unix date will include the time zone.
475 if (isalpha(*token_begin[this_token])) {
477 if (this_token == n_tokens) {
482 strncpy(num_buf, token_begin[this_token], 4);
483 // Don't remove last digit from year and get bad dates in header.
484 num_buf[token_end[this_token] - token_begin[this_token] + 1] = 0;
485 new_time.tm_year = (int) strtol(num_buf, NULL, 10);
486 if (new_time.tm_year > 1900) {
487 new_time.tm_year -= 1900;
490 new_time.tm_isdst = -1;
491 date.dtm_date = SafeMktime(&new_time);
493 date.dtm_tz_offset_secs = timezone;
495 date.dtm_tz_offset_secs = new_time.tm_gmtoff;
499 // In this format, we should have a day of the month.
501 strncpy(num_buf, token_begin[this_token], 2);
503 new_time.tm_mday = (int) strtol(num_buf, NULL, 10);
506 if (this_token == n_tokens) {
510 // Now the month name.
511 new_time.tm_mon = matchMonth(token_begin[this_token], token_end[this_token]);
514 if (this_token == n_tokens) {
518 // The year, which is either 2 or 4 digits.
519 int t_size = token_end[this_token] - token_begin[this_token] + 1;
520 strncpy(num_buf, token_begin[this_token], t_size);
522 new_time.tm_year = (int) strtol(num_buf, NULL, 10);
523 if (new_time.tm_year > 1900) {
524 new_time.tm_year -= 1900;
528 if (this_token == n_tokens) {
532 // The time, in the specified time zone.
533 parseTime(token_begin[this_token], token_end[this_token], new_time);
536 if (this_token == n_tokens) {
540 time_t offset = parseTZ(token_begin[this_token], token_end[this_token]);
543 time_t orig_zone = timezone;
546 // Tell "mktime" to figure "dst" on or not.
547 new_time.tm_isdst = -1;
549 date.dtm_date = SafeMktime(&new_time);
550 date.dtm_tz_offset_secs = offset;
553 timezone = orig_zone;
561 findParenComment(const char * value)
565 for (sparen = value; *sparen; sparen++) {
566 // We must ignore stuff in quotes.
568 if (*sparen == '"') {
582 if (*sparen == '(') {
587 if (*sparen != '(') {
593 for (lparen = (sparen + 1); *lparen; lparen++) {
594 // We will support nested comments of the form (Joe (Hi) Blow)
596 if (*lparen == '(') {
601 if (*lparen == ')') {
610 if (*lparen != ')') {
614 char * comment = (char *)malloc(lparen - sparen + 1);
615 memcpy(comment, (sparen + 1), lparen - sparen - 1);
616 comment[lparen - sparen - 1] = 0;
622 stripAngleAddr(const char * value)
627 for (lt = value; *lt; lt++)
631 if (in_quote) in_quote = 0;
635 if (in_quote) continue;
636 if (*lt == '<') break;
639 if (*lt != '<') return(NULL);
644 for (gt = (lt + 1); *gt; gt++)
648 if (in_quote) in_quote = 0;
652 if (in_quote) continue;
653 if (*gt == '>') break;
657 if (*gt != '>') return(NULL);
659 // Copy everything not in the angle brackets.
661 char * name = (char *)malloc(strlen(value) + 1);
664 for (const char * cp = value; *cp; cp++)
666 if (cp >= lt && cp <= gt) continue;
673 if (strlen(name) == 0)
683 stripQuotesWhiteSpace(const char * value)
685 int found_alphanum = 0;
690 // Skip past leading white space.
692 const char *cp = value;
693 while (isspace(*cp)) cp++;
696 // If there are no quotes, copy and return.
705 // Strip out the quotes.
709 out = name = (char*) malloc(strlen(value)+1);
710 if (NULL == out) return NULL;
724 RFCValue::toAddress(void)
726 // Count the commas, to figure out how big to make the
730 for (const char * comma = _value; *comma; comma++)
731 if (*comma == ',') commas += 1;
733 DtMailAddressSeq * seq = new DtMailAddressSeq(commas);
735 if (!_decoded) decodeValue();
737 RFCTransport::arpaPhrase(_decoded, *seq);
739 // If we have only one address, then let's try to find a comment
740 // so the person can be set. This is trivial to do for one address
741 // and can have a win for displaying the headers in the message
744 if (seq->length() == 1)
746 // This is less than perfect, but we will look for (Name) and
747 // use it first. If we can't find that, then see if we can
748 // find something outside <addr>. If not that, then simply
751 DtMailValueAddress * addr = (*seq)[0];
753 addr->dtm_person = findParenComment(_decoded);
754 if (!addr->dtm_person)
756 char *name = stripAngleAddr(_decoded);
759 addr->dtm_person = stripQuotesWhiteSpace(name);
775 RFCValue::decodeValue(void)
777 // Create the output buffer. We will assume that it is
778 // the header will only shrink by applying RFC1522.
780 int outleft = strlen(_value);
781 char * output = (char *)malloc(outleft + 2);
784 char * cur_c = output;
788 // Scan the value, looking for =? which indicates the start
789 // of a encoded string.
791 for (const char * in_c = _value; *in_c; in_c++) {
792 if (*in_c == '=' && *(in_c + 1) == '?') {
794 // Decode the encoding. Return the last character so the loop
795 // continues to work. Also reset cur_c because the output buffer
798 // Allocate space for buf to contain rest of output because it
799 // is enough space for the decoded quoted-printable or base64.
800 // If codeset conversion is done, then csConvert will re-allocate
803 size_t _valueLen = strlen(_value);
804 const char *in_c_sav = in_c;
806 buf = (char *)malloc(outleft + 2);
808 in_c = decode1522(in_c, _value + _valueLen - 1, &buf, _session);
810 if (in_c > in_c_sav) {
811 size_t bufLen = strlen(buf);
812 if (bufLen > outleft) {
814 (char*) realloc((char*)output, _valueLen + bufLen + 2);
817 strncat(output, buf, bufLen);
818 cur_c = output + strlen(output);
826 // Just copy the byte and reset the null pointer, unless
827 // we are dealing with carriage return.
831 output = (char*) realloc((char*) output, strlen(output) * 2);
832 outleft = strlen(output);
841 // Kill any trailing white space.
845 cur_c >= output && isspace((unsigned char)*cur_c);