/* * CDE - Common Desktop Environment * * Copyright (c) 1993-2012, The Open Group. All rights reserved. * * These libraries and programs are free software; you can * redistribute them and/or modify them under the terms of the GNU * Lesser General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * These libraries and programs are distributed in the hope that * they will be useful, but WITHOUT ANY WARRANTY; without even the * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public * License along with these libraries and programs; if not, write * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth * Floor, Boston, MA 02110-1301 USA */ /* *+SNOTICE * * * $TOG: RFCMailValues.C /main/10 1998/09/03 07:01:06 mgreess $ * * RESTRICTED CONFIDENTIAL INFORMATION: * * The information in this document is subject to special * restrictions in a confidential disclosure agreement bertween * HP, IBM, Sun, USL, SCO and Univel. Do not distribute this * document outside HP, IBM, Sun, USL, SCO, or Univel wihtout * Sun's specific written approval. This documment and all copies * and derivative works thereof must be returned or destroyed at * Sun's request. * * Copyright 1993 Sun Microsystems, Inc. All rights reserved. * *+ENOTICE */ #ifndef I_HAVE_NO_IDENT #endif #include #include #include #include #include #include #include #include "RFCImpl.hh" #include "RFCMIME.hh" #include "str_utils.h" // String values. These assume an RFC format for now. They will // apply RFC1522 coding rules to the strings for dealing with // non-ASCII text in RFC headers. // RFCValue::RFCValue(const char * str, int size) : DtMailValue(NULL) { _value = (char *)malloc(size + 1); memcpy(_value, str, size); _value[size] = 0; _decoded = NULL; _session = NULL; } RFCValue::RFCValue(const char * str, int size, DtMail::Session *s) : DtMailValue(NULL) { _value = (char *)malloc(size + 1); memcpy(_value, str, size); _value[size] = 0; _decoded = NULL; _session = s; } RFCValue::~RFCValue(void) { if (_decoded) { free(_decoded); } } static const char * decode1522(const char * enc_start, const char * max_end, char **output, DtMail::Session *s) { // Find the end of the encoded region. // int qs = 0; const char *enc_end; for (enc_end = enc_start; *enc_end && enc_end < max_end; enc_end++) { if (*enc_end == '?') { qs += 1; if (qs > 3 && *(enc_end + 1) == '=') { break; } } } if (*enc_end != '?') { return(enc_start); } enc_end += 1; // Pull off the char set name. // const char *cs_end; for (cs_end = enc_start + 2; *cs_end != '?'; cs_end++) { continue; } int cs_name_length = cs_end - enc_start - 2; char *cs_name = (char*) malloc(cs_name_length + 1); strncpy(cs_name, enc_start + 2, cs_name_length); cs_name[cs_name_length] = 0; // Set the encoding method and start of buffer. // char encoding = *(cs_end + 1); const char * buf_start = cs_end + 3; switch (toupper(encoding)) { case 'Q': { int len = 0; RFCMIME::readQPrint(*output, len, buf_start, enc_end - buf_start - 1); (*output)[len] = 0; break; } case 'B': { int len = 0; RFCMIME::readBase64(*output, len, buf_start, enc_end - buf_start - 1); (*output)[len] = 0; break; } default: // Invalid encoding. Assume a false match. free(cs_name); return (enc_start); } // Do codeset conversion if charset is present char *from_cs = s->csToConvName(cs_name); char *to_cs = s->locToConvName(); if ( from_cs && to_cs ) { if ( strcasecmp(from_cs, to_cs) != 0 ) { unsigned long tmplen = (unsigned long) strlen(*output); (void) s->csConvert(&(*output), tmplen, 1, from_cs, to_cs); } } if (NULL != from_cs) free( from_cs ); if (NULL != to_cs) free ( to_cs ); free(cs_name); return(enc_end); } RFCValue::operator const char *(void) { if (_decoded) { return(_decoded); } decodeValue(); return(_decoded); } const char * RFCValue::operator= (const char * str) { if (_decoded) { free(_decoded); _decoded = NULL; } if (_value) { free(_value); } _value = strdup(str); return(_value); } static const char * DaysOfTheWeek[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; static const char * MonthsOfTheYear[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; static int matchDay(const char * start, const char * end) { int len = end - start + 1; for (int i = 0; i < 7; i++) { if (strncmp(DaysOfTheWeek[i], start, len) == 0) { return(i); } } return(-1); } static int matchMonth(const char * start, const char * end) { int len = end - start + 1; for (int i = 0; i < 12; i++) { if (strncmp(MonthsOfTheYear[i], start, len) == 0) { return(i); } } return(-1); } static void parseTime(const char * start, const char * end, tm & val) { int size = end - start + 1; // Time will be in the form hh:mm:ss where seconds are optional. char num_buf[10]; strncpy(num_buf, start, 2); num_buf[2] = 0; val.tm_hour = (int) strtol(num_buf, NULL, 10); strncpy(num_buf, &start[3], 2); num_buf[2] = 0; val.tm_min = (int) strtol(num_buf, NULL, 10); if (size > 6) { strncpy(num_buf, &start[6], 2); num_buf[2] = 0; val.tm_sec = (int) strtol(num_buf, NULL, 10); } else { val.tm_sec = 0; } return; } static const char * TZNames[] = { "EST", "CST", "MST", "PST" }; static const char * TZNamesDST[] = { "EDT", "CDT", "MDT", "PDT" }; static time_t parseTZ(const char * start, const char * end) { int size = end - start + 1; // There are at 3 possibilities that we understand. There // is the single letter military time zone. In that case // Z is 0 UTC. A-M is -1 to -12, skipping J. N-Y is +1 to +12 // from UTC. // // Lets start with that one because it is the easiest. if (size == 1) { int hours_from = 0; if (*start >= 'A' && *start <= 'I') { hours_from = *start - 'A' + 1; } else if (*start >= 'L' && *start <= 'M') { hours_from = *start - 'K' + 10; } else if (*start >= 'N' && *start <= 'Y') { hours_from = ('N' - *start) - 1; } return(hours_from * 3600); } // The next option is one of the ANSI standard time zones. These // are three letter abbrievations that tell us where DST in in effect. // So, if we have a length of three, lets see if it is in the table. if (size == 3) { // First normal zones. int i; for (i = 0; i < 4; i++) { if (strncmp(start, TZNames[i], 3) == 0) { return((5 + i) * -3600); } } // Now DST zones for (i = 0; i < 4; i++) { if (strncmp(start, TZNames[i], 3) == 0) { return((4 + i) * -3600); } } } // Finally we understand +/- HHMM from UTC. if (size == 5) { int sign = (*start == '+') ? 1 : -1; char num_buf[10]; strncpy(num_buf, &start[1], 2); num_buf[2] = 0; int hours = (int) strtol(num_buf, NULL, 10); strncpy(num_buf, &start[3], 2); num_buf[2] = 0; int minutes = (int) strtol(num_buf, NULL, 10); return(sign * ((hours * 3600) + (minutes * 60))); } // We have no idea at this point, and it is very unlikely that the // text is meaningful to the reader either. Set the zone to UTC and // punt. It is also possible that the text is "UT" or "GMT" in which // case offset 0 is the right answer. return(0); } DtMailValueDate RFCValue::toDate(void) { DtMailValueDate date; const char * pos = _value; tm new_time; memset(&date, 0, sizeof(date)); memset(&new_time, 0, sizeof(new_time)); date.dtm_date = 0; date.dtm_tz_offset_secs = 0; // Before doing anything, check to see if _value is valid. // Some messages have no Date string. Return date with zeroed fields // in those cases. if (!_value || (strlen(_value) == 0)) return (date); // Find the first non-blank for (; *pos && isspace((unsigned char)*pos); pos++) { continue; } // There are usually no more than 6 tokens in an RFC date. We will // have a few extras just in case we are given a weird string. const char *token_begin[12]; const char *token_end[12]; int n_tokens = 0; // Look for the end of each token. Date tokens are white space // separated. while (*pos) { token_begin[n_tokens] = pos; for (; *pos && !isspace((unsigned char)*pos); pos++) { continue; } if (*pos) { token_end[n_tokens++] = pos - 1; } else { token_end[n_tokens++] = pos; } for (; *pos && isspace((unsigned char)*pos); pos++) { continue; } // This means the message is most likely corrupted so just bail out if (n_tokens == 12) break; } // Some dates will have a comma after the day of the week. We // want to remove that. It will always be the first token if // we have the day of the week. if (*token_end[0] == ',') { token_end[0]--; } if (n_tokens < 2) { return(date); } // There are two possible formats, and many variations, that we // will see in an RFC message. They are: // // Tue Oct 12 10:36:10 1993 // Tue, 12 Oct 1993 10:35:05 PDT // // The first is the 821 format put on by sendmail. The second is // one of the many variants of the 822 format. The big difference // we must detect is "mon dd time year" vs "dd mon year time tz" // // The first qualifier is usually the day of the week. For our purposes, // we will simply throw it away. This information will be recomputed // based on the date and time. int this_token = 0; int day = matchDay(token_begin[this_token], token_end[this_token]); if (day >= 0) { // Ignore the day. this_token += 1; } // This token should either be a numeric day, or an alpha month. // Lets see if it is a month. If so, we know what the rest of // the date will look like. int month = matchMonth(token_begin[this_token], token_end[this_token]); if (month >= 0) { new_time.tm_mon = month; // Now should be the day of the month. char num_buf[20]; this_token += 1; if (this_token == n_tokens) { return(date); } strncpy(num_buf, token_begin[this_token], 2); num_buf[2] = 0; new_time.tm_mday = (int) strtol(num_buf, NULL, 10); this_token += 1; if (this_token == n_tokens) { return(date); } parseTime(token_begin[this_token], token_end[this_token], new_time); this_token += 1; if (this_token == n_tokens) { return(date); } // Sometimes the Unix date will include the time zone. // if (isalpha(*token_begin[this_token])) { this_token += 1; if (this_token == n_tokens) { return(date); } } strncpy(num_buf, token_begin[this_token], 4); // Don't remove last digit from year and get bad dates in header. num_buf[token_end[this_token] - token_begin[this_token] + 1] = 0; new_time.tm_year = (int) strtol(num_buf, NULL, 10); if (new_time.tm_year > 1900) { new_time.tm_year -= 1900; } new_time.tm_isdst = -1; date.dtm_date = SafeMktime(&new_time); #ifdef SVR4 date.dtm_tz_offset_secs = timezone; #else date.dtm_tz_offset_secs = new_time.tm_gmtoff; #endif } else { // In this format, we should have a day of the month. char num_buf[20]; strncpy(num_buf, token_begin[this_token], 2); num_buf[2] = 0; new_time.tm_mday = (int) strtol(num_buf, NULL, 10); this_token += 1; if (this_token == n_tokens) { return(date); } // Now the month name. new_time.tm_mon = matchMonth(token_begin[this_token], token_end[this_token]); this_token += 1; if (this_token == n_tokens) { return(date); } // The year, which is either 2 or 4 digits. int t_size = token_end[this_token] - token_begin[this_token] + 1; strncpy(num_buf, token_begin[this_token], t_size); num_buf[t_size] = 0; new_time.tm_year = (int) strtol(num_buf, NULL, 10); if (new_time.tm_year > 1900) { new_time.tm_year -= 1900; } this_token += 1; if (this_token == n_tokens) { return(date); } // The time, in the specified time zone. parseTime(token_begin[this_token], token_end[this_token], new_time); this_token += 1; if (this_token == n_tokens) { return(date); } time_t offset = parseTZ(token_begin[this_token], token_end[this_token]); #ifdef SVR4 time_t orig_zone = timezone; timezone = offset; #endif // Tell "mktime" to figure "dst" on or not. new_time.tm_isdst = -1; date.dtm_date = SafeMktime(&new_time); date.dtm_tz_offset_secs = offset; #ifdef SVR4 timezone = orig_zone; #endif } return(date); } static char * findParenComment(const char * value) { int in_quote = 0; const char *sparen; for (sparen = value; *sparen; sparen++) { // We must ignore stuff in quotes. // if (*sparen == '"') { if (in_quote) { in_quote = 0; } else { in_quote = 1; } continue; } if (in_quote) { continue; } if (*sparen == '(') { break; } } if (*sparen != '(') { return(NULL); } in_quote = 0; const char *lparen; for (lparen = (sparen + 1); *lparen; lparen++) { // We will support nested comments of the form (Joe (Hi) Blow) // if (*lparen == '(') { in_quote += 1; continue; } if (*lparen == ')') { in_quote -= 1; } if (in_quote < 0) { break; } } if (*lparen != ')') { return(NULL); } char * comment = (char *)malloc(lparen - sparen + 1); memcpy(comment, (sparen + 1), lparen - sparen - 1); comment[lparen - sparen - 1] = 0; return(comment); } static char * stripAngleAddr(const char * value) { int in_quote = 0; const char *lt; for (lt = value; *lt; lt++) { if (*lt == '"') { if (in_quote) in_quote = 0; else in_quote = 1; continue; } if (in_quote) continue; if (*lt == '<') break; } if (*lt != '<') return(NULL); in_quote = 0; const char *gt; for (gt = (lt + 1); *gt; gt++) { if (*gt == '"') { if (in_quote) in_quote = 0; else in_quote = 1; continue; } if (in_quote) continue; if (*gt == '>') break; } if (*gt != '>') return(NULL); // Copy everything not in the angle brackets. // char * name = (char *)malloc(strlen(value) + 1); char * out = name; for (const char * cp = value; *cp; cp++) { if (cp >= lt && cp <= gt) continue; *out++ = *cp; } *out = 0; if (strlen(name) == 0) { free(name); return(NULL); } return(name); } static char * stripQuotesWhiteSpace(const char * value) { int found_alphanum = 0; char *name = NULL; char *out = NULL; // // Skip past leading white space. // const char *cp = value; while (isspace(*cp)) cp++; // // If there are no quotes, copy and return. // if (*cp != '"') { name = strdup(cp); return name; } // // Strip out the quotes. // cp++; out = name = (char*) malloc(strlen(value)+1); if (NULL == out) return NULL; while (*cp != '"') { *out = *cp; out++; cp++; } *out = 0; return name; } DtMailAddressSeq * RFCValue::toAddress(void) { // Count the commas, to figure out how big to make the // sequence. // int commas = 3; for (const char * comma = _value; *comma; comma++) if (*comma == ',') commas += 1; DtMailAddressSeq * seq = new DtMailAddressSeq(commas); if (!_decoded) decodeValue(); RFCTransport::arpaPhrase(_decoded, *seq); // If we have only one address, then let's try to find a comment // so the person can be set. This is trivial to do for one address // and can have a win for displaying the headers in the message // scrolling list. // if (seq->length() == 1) { // This is less than perfect, but we will look for (Name) and // use it first. If we can't find that, then see if we can // find something outside . If not that, then simply // give up. // DtMailValueAddress * addr = (*seq)[0]; addr->dtm_person = findParenComment(_decoded); if (!addr->dtm_person) { char *name = stripAngleAddr(_decoded); if (name) { addr->dtm_person = stripQuotesWhiteSpace(name); free(name); } } } return(seq); } const char * RFCValue::raw(void) { return(_value); } void RFCValue::decodeValue(void) { // Create the output buffer. We will assume that it is // the header will only shrink by applying RFC1522. // int outleft = strlen(_value); char * output = (char *)malloc(outleft + 2); *output = 0; char * cur_c = output; char *buf = NULL; // Scan the value, looking for =? which indicates the start // of a encoded string. // for (const char * in_c = _value; *in_c; in_c++) { if (*in_c == '=' && *(in_c + 1) == '?') { // // Decode the encoding. Return the last character so the loop // continues to work. Also reset cur_c because the output buffer // has been updated. // // Allocate space for buf to contain rest of output because it // is enough space for the decoded quoted-printable or base64. // If codeset conversion is done, then csConvert will re-allocate // enough space. // size_t _valueLen = strlen(_value); const char *in_c_sav = in_c; buf = (char *)malloc(outleft + 2); strcpy(buf, in_c); in_c = decode1522(in_c, _value + _valueLen - 1, &buf, _session); if (in_c > in_c_sav) { size_t bufLen = strlen(buf); if (bufLen > outleft) { output = (char*) realloc((char*)output, _valueLen + bufLen + 2); outleft += bufLen; } strncat(output, buf, bufLen); cur_c = output + strlen(output); outleft -= bufLen; free(buf); continue; } free(buf); } // Just copy the byte and reset the null pointer, unless // we are dealing with carriage return. // if (*in_c != '\r') { if (outleft == 0) { output = (char*) realloc((char*) output, strlen(output) * 2); outleft = strlen(output); } *cur_c = *in_c; cur_c++; *cur_c = 0; outleft--; } } // Kill any trailing white space. // *cur_c = 0; for (cur_c -= 1; cur_c >= output && isspace((unsigned char)*cur_c); cur_c--) { *cur_c = 0; } _decoded = output; }