/* * CDE - Common Desktop Environment * * Copyright (c) 1993-2012, The Open Group. All rights reserved. * * These libraries and programs are free software; you can * redistribute them and/or modify them under the terms of the GNU * Lesser General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * These libraries and programs are distributed in the hope that * they will be useful, but WITHOUT ANY WARRANTY; without even the * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public * License along with these libraries and programs; if not, write * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth * Floor, Boston, MA 02110-1301 USA */ /* *+SNOTICE * * * $TOG: RFCMessage.C /main/9 1998/04/06 13:29:56 mgreess $ * * RESTRICTED CONFIDENTIAL INFORMATION: * * The information in this document is subject to special * restrictions in a confidential disclosure agreement bertween * HP, IBM, Sun, USL, SCO and Univel. Do not distribute this * document outside HP, IBM, Sun, USL, SCO, or Univel wihtout * Sun's specific written approval. This documment and all copies * and derivative works thereof must be returned or destroyed at * Sun's request. * * Copyright 1993 Sun Microsystems, Inc. All rights reserved. * *+ENOTICE */ #include #include #include #include #include #include #include "RFCImpl.hh" #include #include "str_utils.h" const char * RFCDeleteHeader = "X-Dt-Delete-Time"; static unsigned long RFCMessageSignature = 0xff83421e; // RFCMessage constructor for alternative message parts // Arguments: // error -- // alt_start -- beginning of alternative message (at interpart boundary) // alt_end -- last byte of alternative message // Outputs: // RFCMessage * (with _alternativeMessage == DTM_TRUE) that has a completely // parsed and setup message with body parts and minimal envelope. // Description: // This constructor is used to create a message that is one of possibly // many alternative messages that are part of a single MIME multipart // alternative message. // RFCMessage::RFCMessage(DtMailEnv & error, const char * alt_start, const char * alt_end) : DtMail::Message(error, NULL), _bp_cache(8), _alt_msg_cache(0) { error.clear(); // Initialize objects within RFCMessage structure // _object_signature = RFCMessageSignature; _dirty = 0; // can never be dirty _alternativeMultipart = DTM_FALSE; // msg does not contain alternatives _alternativeMessage = DTM_TRUE; // msg is an alternative message _alternativeValid = DTM_FALSE; // msg is not valid (maybe later) _msg_start = alt_start; _msg_end = alt_end; _msg_buf = NULL; _body_start = NULL; // parse the message, creating an envelope to encompass the first headers // found and setting up the body part boundaries // (void) parseMsg(error, (char *) alt_end); if (error.isSet()) { return; } // parse the message bodies - this constructor is currently only called // for a MIME multipart alternative message part. If other mail protocols // are added that have similar functionality, this constructor will have // to be augmented to take this into consideration // parseMIMEBodies(error); if (error.isSet()) { return; } // it worked! return; } RFCMessage::RFCMessage(DtMailEnv & error, DtMail::MailBox * parent, const char ** start, const char * end_of_file) : DtMail::Message(error, parent), _bp_cache(8), _alt_msg_cache(8) { _object_signature = RFCMessageSignature; error.clear(); if (_parent) { _session = _parent->session(); } _dirty = 0; _alternativeMultipart = DTM_FALSE; _alternativeMessage = DTM_FALSE; _alternativeValid = DTM_FALSE; _msg_start = *start; *start = parseMsg(error, end_of_file); // We want to be lazy about parsing the body. Most messages are // never viewed in a folder. We have already hit the pages once, // but with MADV_SEQUENTIAL on, they are marked for immediate // disposal so they may be gone. Of course we are going to slow // the first display of the message, but odds are that will never // happen anyway. // // There is another reason for delaying the parsing on an MT hot // platform. We are parsing on a separate thread. If we wait until // we are asked for information about the bodies to parse them, // we can get the main thread to do this work. So, we can actually // get some concurrent parsing to occur. // // Conclusion: Lazy is better! // return; } RFCMessage::RFCMessage(DtMailEnv & error, DtMail::Session * session, DtMailObjectSpace space, void * arg, DtMailCallback, void *) : DtMail::Message(error, NULL), _bp_cache(8), _alt_msg_cache(8) { // We do different processing, depending on the space for the // object. // _session = session; switch (space) { case DtMailBufferObject: { _object_signature = RFCMessageSignature; error.clear(); _dirty = 0; _alternativeMultipart = DTM_FALSE; _alternativeMessage = DTM_FALSE; _alternativeValid = DTM_FALSE; _msg_buf = (DtMailBuffer *)arg; // If the buffer is not null, then we need to parse the // buffer. // if (_msg_buf->size > 0) { if (hasHeaders((const char *)_msg_buf->buffer, _msg_buf->size) == DTM_FALSE) { error.setError(DTME_UnknownFormat); return; } _msg_start = (char *)_msg_buf->buffer; (void) parseMsg(error, (char *)_msg_buf->buffer + _msg_buf->size); if (error.isSet()) { return; } } else { // We need to construct an empty message. // _msg_start = _msg_end = _body_start = NULL; _envelope = new RFCEnvelope(error, this, NULL, 0); } } break; case DtMailFileObject: default: error.setError(DTME_NotSupported); } } RFCMessage::~RFCMessage(void) { if (_object_signature == RFCMessageSignature) { MutexLock lock_scope(_obj_mutex); _object_signature = 0; // spin through the body part cache deleting all parts cached. // If _alternativeMultipart is set, this is the "main message" and // as such all of the body parts are merely copied from one of the // underlying alternative messages, so in this case we only delete the // body part cache and not the contents of the cache, as that will be // taken care of when the alternative message cache is purged. // while (_bp_cache.length()) { BodyPartCache * bpc = _bp_cache[0]; if (!_alternativeMultipart) delete bpc->body; delete bpc; _bp_cache.remove(0); } // spin through the alternative message cache destroying any // alternative messages which have been built to support a // multipart alternative message // if (_alternativeMultipart == DTM_TRUE) while (_alt_msg_cache.length()) { AlternativeMessageCache * amc = _alt_msg_cache[0]; assert(amc != NULL); assert(amc->amc_msg != NULL); delete amc->amc_msg; assert(amc->amc_body != NULL); delete amc->amc_body; delete amc; _alt_msg_cache.remove(0); } } } void RFCMessage::toBuffer(DtMailEnv & error, DtMailBuffer & buf) { error.clear(); buf.size = _msg_end - _msg_start + 1; buf.buffer = new char[buf.size]; if (buf.buffer == NULL) { error.setError(DTME_NoMemory); return; } memcpy(buf.buffer, _msg_start, (size_t) buf.size); } const char * RFCMessage::impl(DtMailEnv & error) { // By definition, we are contained within some RFC container. We // want to make sure we return the exact same string, so just return // what our containing parent uses for its impl ID string. // if (_parent) { return(_parent->impl(error)); } return("Internet MIME"); } DtMail::Envelope * RFCMessage::getEnvelope(DtMailEnv & error) { error.clear(); return(_envelope); } int RFCMessage::getBodyCount(DtMailEnv & error) { error.clear(); // We may need to parse the body! // if (_bp_cache.length() == 0) { parseBodies(error); } return(_bp_cache.length()); } DtMail::BodyPart * RFCMessage::getFirstBodyPart(DtMailEnv & error) { error.clear(); // We may need to parse the body! // if (_bp_cache.length() == 0 && _msg_start) { parseBodies(error); } if (_bp_cache.length() == 0) { return(NULL); } return(bodyPart(error, 0)); } DtMail::BodyPart * RFCMessage::getNextBodyPart(DtMailEnv & error, DtMail::BodyPart * last) { error.clear(); int slot = lookupByBody(last); if (slot < 0 || (slot + 1) >= _bp_cache.length()) { return(NULL); } slot += 1; return(bodyPart(error, slot)); } DtMail::BodyPart * RFCMessage::newBodyPart(DtMailEnv & error, DtMail::BodyPart * after) { error.clear(); int slot = -1; if (after) { slot = lookupByBody(after); } RFCBodyPart * bp = new MIMEBodyPart(error, this, NULL, 0, NULL); BodyPartCache * bpc = new BodyPartCache; bpc->body = bp; bpc->body_start = NULL; if (slot < 0) { _bp_cache.append(bpc); } else { _bp_cache.insert(bpc, slot + 1); } return(bp); } void RFCMessage::setFlag(DtMailEnv & error, const DtMailMessageState flag) { error.clear(); time_t now; char str_time[20]; switch (flag) { case DtMailMessageNew: _envelope->setHeader(error, "Status", DTM_TRUE, "NR"); break; case DtMailMessageDeletePending: // Start the delete time out by setting the X-Delete-Time header. // now = time(NULL); sprintf(str_time, "%08lX", (long)now); _envelope->setHeader(error, RFCDeleteHeader, DTM_TRUE, str_time); break; case DtMailMessagePartial: // // There is nothing to do for partial. It is already // marked as partial as it has the Content-Type: message/partial // header. This case statment exists so that SetFlag will not // drop into the default 'error' below. break; default: error.setError(DTME_OperationInvalid); break; } return; } void RFCMessage::resetFlag(DtMailEnv & error, const DtMailMessageState flag) { error.clear(); switch (flag) { case DtMailMessageNew: _envelope->setHeader(error, "Status", DTM_TRUE, "RO"); break; case DtMailMessageDeletePending: _envelope->removeHeader(error, RFCDeleteHeader); break; case DtMailMessagePartial: // // There is nothing to do for partial. It is already // marked as partial as it has the Content-Type: message/partial // header. This case statment exists so that resetFlag will not // drop into the default 'error' below. // // You can not 'reset' message partial, you can only // delete the header. break; default: error.setError(DTME_OperationInvalid); } return; } DtMailBoolean RFCMessage::flagIsSet(DtMailEnv & error, const DtMailMessageState flag) { DtMailValueSeq value; DtMailBoolean answer = DTM_FALSE; switch (flag) { case DtMailMessageNew: _envelope->getHeader(error, "Status", DTM_FALSE, value); if (error.isNotSet()) { const char * status = *(value[0]); if (strcasecmp(status, "ro")) { answer = DTM_TRUE; } } else { // No Status: means a new message. error.clear(); answer = DTM_TRUE; } break; case DtMailMessageDeletePending: _envelope->getHeader(error, RFCDeleteHeader, DTM_FALSE, value); if (error.isNotSet()) { answer = DTM_TRUE; } else { error.clear(); } break; case DtMailMessageMultipart: _envelope->getHeader(error, "Content-Type", DTM_FALSE, value); if (error.isNotSet()) { const char * type = *(value[0]); if (strcasecmp(type, "X-Sun-Attachment") == 0 || strncasecmp(type, "multipart", 9) == 0) { answer = DTM_TRUE; } } else { error.clear(); } break; case DtMailMessagePartial: _envelope->getHeader(error, "Content-Type", DTM_FALSE, value); if (error.isNotSet()) { const char * type = *(value[0]); if (strncasecmp(type, "message/partial", 15) == 0) { answer = DTM_TRUE; } } else { error.clear(); } break; default: break; } error.clear(); return(answer); } time_t RFCMessage::getDeleteTime(DtMailEnv & error) { time_t delete_time = 0; DtMailValueSeq value; _envelope->getHeader(error, RFCDeleteHeader, DTM_FALSE, value); if (error.isNotSet()) { delete_time = (time_t) strtol(*(value[0]), NULL, 16); } error.clear(); return(delete_time); } void RFCMessage::markDirty(const int delta) { if (_parent) { ((RFCMailBox *)_parent)->markDirty(delta); } _dirty += delta; if (!_dirty) { _dirty += 1; } } // Function: RFCMessage::fixMessageLocation - return address/length of message // Description: // This function is used to obtain an address and length in bytes of a // message. // Method: // If the message is not dirty (e.g. already mapped into memory as one // contiguous stream of bytes) then simply return its address and length; // otherwise, must construct the message into a temporary area, the address // and length of which is then returned. // Arguments: // long & msgLen -- (output) length of message in bytes // int & msgTemporary -- (output) 0 if message resides in previously // allocated memory (e.g. mapped already), // != 0 if temporary space allocated to hold // message, requiring later deallocation // Returns: // const char * -> first byte of message // void RFCMessage::fixMessageLocation(char ** msgHeaderStart, long & msgHeaderLen, char ** msgBodyStart, long &msgBodyLen, int & msgTemporary, long & msgBodyOffset) { if (!_dirty) { // This is easy as the message is complete as is, simply return // its current memory location and size in bytes. The message is // "permanent" in that it does not currently reside in a temporary // memory address which later has to be deallocated. // *msgHeaderStart = (char *)_msg_start; msgHeaderLen = _body_start - _msg_start; *msgBodyStart = (char *)_body_start; msgBodyLen = (_msg_end - _body_start)+1; msgTemporary = 0; msgBodyOffset = 0; // no header offset fudge factor return; } // Well, now it gets tricky. The message is not assembled in one // place in memory, which prevents fixing an address and length // for it, so we have to force the message to be constructed. This // is done in a "temporary" memory area which has to be deallocated // when the message is mapped in from a real file. // pinMessageDown(msgHeaderStart, msgHeaderLen, msgBodyStart, msgBodyLen); msgBodyOffset = msgHeaderLen-(_body_start-_msg_start); msgTemporary = 1; return; } // Function: RFCMessage::pinMessageDown - fix complete message in memory // Description: // This function is used on a "dirty" message to cause a complete // in memory copy of the message to be created so that it can be // written out (probably as a result of a mailbox save operation). // Method: // Arguments: // long & msgLen -- (output) length of message in bytes // long & msgBodyOffset -- (output) +- adjustment (between original message // and new message) of the first byte at which the // body of the message begins -- this lets us determine // the body start in cases where the headers are changed // without reparsing the message // Returns: // const char * -> first byte of message pinned in memory // void RFCMessage::pinMessageDown(char ** msgHeaderStart, long & msgHeaderLen, char ** msgBodyStart, long &msgBodyLen) { // First, determine which write method to use. We have // 2 choices, Sun V3, and MIME. We will need to compute // the size of the message, then make another pass to // actually store the body. // DtMailEnv error; int content_length; DtMailValueSeq value; // Compute content length of message // _envelope->getHeader(error, "Mime-Version", DTM_FALSE, value); if (error.isNotSet()) { content_length = sizeMIMEBodies(error); } else { error.clear(); content_length = sizeV3Bodies(error); } content_length = content_length < 0 ? 0 : content_length; char len_buf[20]; sprintf(len_buf, "%d", content_length); _envelope->setHeader(error, "Content-Length", DTM_TRUE, len_buf); // Allocate storage for the headers and write headers into it // const size_t maxHeaderLength = (size_t) ((RFCEnvelope *)_envelope)->headerLength(); const size_t fudgeAtEnd = 102; // two extra \n's at end of msg + slop size_t msgNewHeaderSize = (maxHeaderLength+fudgeAtEnd); *msgHeaderStart = (char *)malloc(msgNewHeaderSize); assert(*msgHeaderStart != NULL); char * end = ((RFCEnvelope *)_envelope)->writeHeaders(*msgHeaderStart); end += 1; *end++ = '\n'; msgHeaderLen = end-*msgHeaderStart; // Make sure we did not overflow the allocated area // assert(msgHeaderLen < msgNewHeaderSize); // As of this implementation, body parts can never change, so we do not have to // worry about the body part needing reconstruction // *msgBodyStart = (char *)_body_start; msgBodyLen = (_msg_end - _body_start)+1; return; } void RFCMessage::adjustMessageLocation(char * oldStart, char * newStart, long newLength, int msgTemporary, long newBodyOffset) { MutexLock lock_scope(_obj_mutex); assert((_dirty && msgTemporary) || (!_dirty && !msgTemporary)); // At this point the message has been mapped into a new location in memory // Must adjust all pointers to data contained within this message // int len = (int) ((_msg_end - _msg_start) + 1 + newBodyOffset); assert(len == newLength); _body_start = (_body_start - _msg_start) + newStart + newBodyOffset; _msg_end = newStart + len - 1; ((RFCEnvelope *)_envelope)->adjustHeaderLocation(newStart, (int)(_body_start-newStart)); int bpMaxSlot = _bp_cache.length(); int bp; for (bp = 0; bp < bpMaxSlot; bp++) { char * new_loc = newStart + (_bp_cache[bp]->body_start - _msg_start) + newBodyOffset; _bp_cache[bp]->body->adjustBodyPartsLocation(new_loc); _bp_cache[bp]->body_start = new_loc; } // If this is the main portion of a multipart alternative message, // we must handle each alternative and all of the body parts that // comprise each alternative // if (_alternativeMultipart == DTM_TRUE) { // Multipart Alternative message parent body // Adjust all of the body parts cached for the alternatives // int altCacheMaxSlot = _alt_msg_cache.length(); for (int altCacheSlot = 0; altCacheSlot < altCacheMaxSlot; altCacheSlot++) { AlternativeMessageCache *amc = _alt_msg_cache[altCacheSlot]; char *altNewStart = (((RFCEnvelope *) amc->amc_msg->_envelope)->headerLocation() - _msg_start) + newStart + newBodyOffset; int altHeaderLength = (int) ((RFCEnvelope *) amc->amc_msg->_envelope)->headerLength(); ((RFCEnvelope *) amc->amc_msg->_envelope)->adjustHeaderLocation(altNewStart, altHeaderLength); bpMaxSlot = amc->amc_msg->_bp_cache.length(); for (bp = 0; bp < bpMaxSlot; bp++) { char *new_loc = newStart + (amc->amc_msg->_bp_cache[bp]->body_start - _msg_start) + newBodyOffset; amc->amc_msg->_bp_cache[bp]->body->adjustBodyPartsLocation(new_loc); amc->amc_msg->_bp_cache[bp]->body_start = new_loc; } } } _msg_start = newStart; // If this message is "temporary", it was dirty at the time its location // was fixed, and therefore was placed into an area allocated on the fly. // Since the message and all associated pointers no longer refer to this // area, it must be free'd // if (msgTemporary) free(oldStart); _dirty = 0; } void RFCMessage::unfixMessageLocation(char * msgStart, int msgTemporary) { MutexLock lock_scope(_obj_mutex); // If this message is "temporary", it was dirty at the time its location // was fixed, and therefore was placed into an area allocated on the fly. // Unfix is only called if an error has occurred which requires the // new message to be tossed, so we must free any storage allocated to // this message by a previous "fixMessageLocation". // if (msgTemporary) free(msgStart); } DtMail::BodyPart * RFCMessage::bodyPart(DtMailEnv & error, const int slot) { error.clear(); MutexLock lock_scope(_obj_mutex); return(_bp_cache[slot]->body); } int RFCMessage::lookupByBody(DtMail::BodyPart * bp) { for (int slot = 0; slot < _bp_cache.length(); slot++) { if (_bp_cache[slot]->body == bp) { return(slot); } } return(-1); } inline const char * backcrlf(const char * pos) { const char * back = pos; if (*back == '\n') { back -= 1; } if (*back == '\r') { back -= 1; } return(back); } const char * RFCMessage::parseMsg(DtMailEnv & error, const char * end_of_file) { // We need to find the end of the envelope. This is signified by a // single blank line. This should simply be "\n\n" in the message // but RFC says parsers should be forgiving so we will tolerate // "\n\n" // const char *hdr_end; for (hdr_end = _msg_start; hdr_end <= end_of_file; hdr_end++) { if (*hdr_end == '\n') { int blanks_only = 1; for (const char * blanks = hdr_end + 1; *blanks != '\n' && blanks <= end_of_file; blanks++) { if (!isspace((unsigned char)*blanks)) { blanks_only = 0; break; } } if (blanks_only) { // Found the end of the headers. break; } } } if (hdr_end > end_of_file) { // Ran off the end. error.setError(DTME_NotMailBox); return(end_of_file + 1); } // We need to parse the headers now, because they will give us the // content length, type, and a message id. // _envelope = new RFCEnvelope(error, this, _msg_start, hdr_end - _msg_start + 1); if (error.isSet()) { // Oops! We need to find the next "From " line if possible to at least // let the rest of the parsing proceed. // const char *next_from; for (next_from = hdr_end + 1; next_from <= (end_of_file - 6); next_from++) { if (strncmp(next_from, "\nFrom ", 6) == 0) { break; } } const char * new_end; if (next_from > (end_of_file - 6)) { new_end = end_of_file + 1; } else { new_end = next_from + 1; } return(new_end); } // The hdr_end now points at the last newline of the envelope. // We want the body to start after the next newline, but we must // be prepared to stop if the end of the file is seen before the // body start is found, which can happen if the last message in // the inbox has no contents at all. // for (_body_start = hdr_end + 1; _body_start <= end_of_file && *_body_start != '\n'; _body_start++) { continue; } _body_start += 1; if (_body_start > end_of_file) // past the end?? _body_start = end_of_file; // yes: body starts on last byte // Need to know where this message ends. // const char *messageEnd = findMsgEnd(error, end_of_file); // Need to guard against the case where the current body begins // past the "end" of the current message - this can happen if a // message consists of [this_msg_headers\n][\n][next_msg_headers] // where there is a zero length (no) body. // if (_body_start > _msg_end) // Does body start after last byte in message? _body_start = _msg_end; // Yes: force body to start at last byte of message return(messageEnd); } // returns -> last byte of current message // const char * RFCMessage::findMsgEnd(DtMailEnv & error, const char * eof) { // This function sets "_msg_end" to ( "_body_start" - 1 + length_of_message ). // This function returns a pointer to the beginning of the next envelope "From " or (eof+1). // See if we have a content length. If so, then will try it first. // long content_length; error.clear(); _msg_end = _body_start; DtMailValueSeq value; _envelope->getHeader(error, "content-length", DTM_FALSE, value); if (error.isNotSet()) { content_length = atol(*(value[0])); // Look forward content_length amount and see if we are at // the end (but not beyond) of file, or have a "\nFrom ". // We must be careful here when using content_length as it *may* // be a really large (bogus) number and the addition of that number // to the start of the body *may* wrap around the top of the address // space, thus resulting in a memory access violation. // const char * next_msg = _body_start + content_length; const int contentLengthFudgeFactor = 30; // "From" header min length if (next_msg < _body_start) // message wrapped address space? next_msg = eof+contentLengthFudgeFactor+1; // yes: downsize it // A special case for content-length = 0. Some folders only // have one new line in this case which will confuse us // unless we do something special. // if (content_length == 0) { next_msg -= 1; if (next_msg <= eof && strncmp(next_msg, "\nFrom ", 6) == 0) { _msg_end = next_msg; return(next_msg + 1); } } if (next_msg <= (eof - 6) && strncmp(next_msg, "\nFrom ", 6) == 0) { // It worked! _msg_end = backcrlf(next_msg); return(next_msg + 1); } // Content length extends beyond the eof! // Here it becomes tricky (see bug 1204026). We do not want to // cause the partial receipt of a very large mail message to // cause other valid mail messages to be "gobbled up" in this // message; therefore, we apply a small fudge factor rule here. // If the current end of message is within X bytes (about the // size of a reasonable "From" header) then we take the current // end to be fore this message; otherwise, err on the side of // bursting this message into multiple smaller ones by doing // the "From" scan dance. // if ( (next_msg > eof) && (next_msg < eof+contentLengthFudgeFactor) ) { _msg_end = eof; return(next_msg); } // We need to deal with extraneous white space at the end of // the file. // if (next_msg <= eof) { const char *white; for (white = next_msg; white <= eof && isspace((unsigned char)*white); white++) { continue; } if (white > eof) { _msg_end = backcrlf(next_msg); return(eof + 1); } // See if we are at a "\nFrom ". If so, consider it a // case of off by one and just accept it. This will save // us from scanning the entire message when the content-length // simply didnt get trailing white space right. // if (strncmp(white - 1, "\nFrom ", 6) == 0) { _msg_end = white - 2; return(white); } } } else { error.clear(); } // If the message was sent with X-Lines, we can use it to help // us find the end of the message. // value.clear(); _envelope->getHeader(error, "x-lines", DTM_FALSE, value); if (error.isNotSet()) { int xlines = (int) atol(*(value[0])); int lcnt = 0; for (_msg_end = _body_start; _msg_end <= eof; _msg_end++) { if (*_msg_end == '\n') { lcnt += 1; if (lcnt == xlines) { break; } } } } if (strncmp(_msg_end, "\nFrom ", 6) != 0) { // Well, looks like we have to do it the costly way. Scan the // folder until we hit the end of file, or we hit a "From " at // the start of a line. // for (_msg_end = _body_start - 1; _msg_end <= (eof - 6); _msg_end++) { if (strncmp(_msg_end, "\nFrom ", 6) == 0) { break; } } } // The end is either the eof, or the from line. // const char * real_end = _msg_end + 1; if (_msg_end > (eof - 6)) { real_end = eof + 1; _msg_end = eof; } else { // Again, protect against NULL mesages with 1 blank line // before next message. Size was < 0 and crashed in "memcpy". if ( _msg_end >= _body_start ) _msg_end = backcrlf(_msg_end); } // Let's put a content length on this thing so we won't have to go // through this silliness again! // content_length = _msg_end - _body_start + 1; content_length = content_length < 0 ? 0 : content_length; char buf[20]; sprintf(buf, "%lu", content_length); _envelope->setHeader(error, "Content-Length", DTM_TRUE, buf); return(real_end); } // This is a list of all of the Sun V3 specific headers // static const char *SCANLIST[] = { "x-sun-charset", "x-sun-content-length", "x-sun-content-lines", "x-sun-data-description", "x-sun-data-file", "x-sun-data-name", "x-sun-data-type", "x-sun-encoding-info", "x-sun-reference-file", "x-sun-text-type", (const char *)0}; void RFCMessage::parseBodies(DtMailEnv & error) { error.clear(); // First we need to determine which type of message bodies we // have. There are actually 3 possibilities: single body part, // Sun V3, or MIME. The main difference in the last 2 is the // headers and delimiters used. // DtMailValueSeq value; _envelope->getHeader(error, "Mime-Version", DTM_FALSE, value); if (error.isNotSet()) { parseMIMEBodies(error); } else { error.clear(); // We need to try a common MIME header to see if maybe the // message is just poorly formed. We will examine the content // type to see if smells like a MIME type. // value.clear(); _envelope->getHeader(error, "Content-Type", DTM_FALSE, value); if (error.isSet()) { // No content-type or Mime-Version header: treat as V3 // error.clear(); parseV3Bodies(error); } else { const char * content_type = *(value[0]); // Handle "Content-Type: text" problem with /usr/lib/mail.local: // Apparently /usr/lib/mail.local will insert a "Content-Type: text" // header into received mail if no other Content-Type field is present. // The format of this header is consistent with RFC 1049 (March 1988) // which has since been superceeded by the MIME RFC 1521. // if (strpbrk(content_type, "/;")) { // sub-types identify this as MIME formatted e-mail // parseMIMEBodies(error); } else if (strcasecmp(content_type, "text")==0) { // Problem time - no Mime-Version is present yet there is a // content-type: text which means it could either be a Sun V3 OR // a very poorly formatted MIME message: see if any V3 message // headers are found and if so treat as V3 else treat as MIME // const char **cp; for (cp = SCANLIST; *cp; cp++) { _envelope->getHeader(error, *cp, DTM_FALSE, value); if (error.isNotSet()) break; error.clear(); } if (*cp) { // Found v3 header - treat as such // parseV3Bodies(error); } else { // No v3 headers found - treat as MIME // parseMIMEBodies(error); } } else { parseV3Bodies(error); } } } return; } void RFCMessage::parseMIMEBodies(DtMailEnv & error) { error.clear(); // MIME messages have many different forms, all of which we have to // take into account when parsing the bodies. If there is *no* MIME // Content-Type field, then we treat the entire body of the message // as a single body part of Content-Type: text/plain. // DtMailValueSeq value; _envelope->getHeader(error, "Content-Type", DTM_FALSE, value); if (error.isSet()) { parseMIMETextPlain(error); return; } // We have a MIME Content-Type field - handle the field appropriately const char * content_type = *(value[0]); if (strncasecmp(content_type, "multipart/", 10) == 0) parseMIMEMultipartSubtype(error, content_type+9); else if (strncasecmp(content_type, "message/", 8) == 0) parseMIMEMessageSubtype(error, content_type+7); else // Unknown MIME Content-Type field - handle as text/plain. parseMIMETextPlain(error); return; } // parseMIMEMultipartSubtype -- parse a MIME Multipart content-type // Arguments: // error -- // subtype -- -> Multipart subtype string (begining with "/") // Outputs: // Handle the parsing of the MIME Multipart content-type message according // to the Multipart sub-type. // Returns: // <> // Description: // Call this function to handle the parsing of a MIME Multipart subtype. // void RFCMessage::parseMIMEMultipartSubtype(DtMailEnv & error, const char * subtype) { // Have a multipart message to digest - determine type of multipart message // const char * boundary = (const char *) extractBoundary(subtype); if (!boundary) { // odd--there is no boundary in the multipart/ specification? // This is specifically required as per RFC 1521: // The only mandatory parameter for the multipart Content-Type is // the boundary parameter. // Treat as one big unbounded message message. // BodyPartCache * bpc = new BodyPartCache; bpc->body_start = _body_start; bpc->body = new MIMEBodyPart(error, this, _body_start, _msg_end - _body_start + 1, (RFCEnvelope *)_envelope); _bp_cache.append(bpc); return; } if (strncasecmp(subtype, "/alternative", 12)==0) parseMIMEMultipartAlternative(error, boundary); else if (strncasecmp(subtype, "/mixed", 6)==0) parseMIMEMultipartMixed(error, boundary); else // unknown types handled as Multipart/mixed parseMIMEMultipartMixed(error, boundary); assert(boundary != NULL); free((void *)boundary); return; } // parseMIMEMessageExternalBody -- parse a MIME Message/External-body message // Arguments: // error -- // Outputs: // Process a MIME Message/External-body message // Returns: // <> // Description: // Call this function to handle a MIME Message/External-body message // void RFCMessage::parseMIMEMessageExternalBody(DtMailEnv & error) { // Have a Message/External-Body message to digest - real dumb here, // include entire message including the headers as a single text/plain // BodyPartCache * bpc = new BodyPartCache; bpc->body_start = _msg_start; bpc->body = new MIMEBodyPart(error, this, _msg_start, _msg_end - _msg_start + 1, (RFCEnvelope *)_envelope); _bp_cache.append(bpc); return; } // parseMIMEMessageSubtype -- parse a MIME Message content-type // Arguments: // error -- // subtype -- -> Message subtype string (begining with "/") // Outputs: // Handle the parsing of the MIME Message content-type message according // to the Message sub-type. // Returns: // <> // Description: // Call this function to handle the parsing of a MIME Message subtype. // void RFCMessage::parseMIMEMessageSubtype(DtMailEnv & error, const char * subtype) { // Have a Message message to digest - determine type of Message message // if (strncasecmp(subtype, "/external-body", 14) == 0) parseMIMEMessageExternalBody(error); else parseMIMETextPlain(error); return; } // parseMIMETextPlain -- parse a MIME Text/Plain message // Arguments: // error -- // Outputs: // Construct single body part cache component encompassing the entire body // of the message as defined by _body_start for (_msg_end-_body_start)+1 bytes // Returns: // <> // Description: // Call this function to take the current message body and treat it as // a single message of type text/plain. // void RFCMessage::parseMIMETextPlain(DtMailEnv & error) { error.clear(); // This is a single body part of text/plain. // BodyPartCache * bpc = new BodyPartCache; bpc->body_start = _body_start; bpc->body = new MIMEBodyPart(error, this, _body_start, _msg_end - _body_start + 1, (RFCEnvelope *)_envelope); _bp_cache.append(bpc); return; } // parseMIMEMultipartAlternative -- parse a MIME multipart/alternative message // Arguments: // error -- // boundary -- interpart boundary // Outputs: // updated message pointers, constructed body part cache, and // constructed alternative message cache // Returns: // <> // Description: // Given a multipart alternative MIME message, parse the message, creating // a body part cache with an entry containing each body part, and ???GMG??? // // Here is how a multipart alternative message will be parsed into memory, // where alternatives 1 and 2 can be displayed / processed on this system, // but alternative 3 can not. // // +--------------------+ -> A // | headers | // +--------------------+ -> B // | interpart boundary | // +--------------------+ -> C // | alternative 1 | // +--------------------+ -> D // | interpart boundary | // +--------------------+ -> E // | alternative 2 | // +--------------------+ -> F // | interpart boundary | // +--------------------+ -> G // | alternative 3 | // +--------------------+ -> H // _alternativeMultipart == DTM_TRUE // _alternativeMessage == DTM_FALSE // _alternativeValid == DTM_FALSE // _bp_cache = contents of alternative 2's bp_cache (because 1-2 valid, 3 not) // _alt_msg_cache = entries for alternative 1, alternative 2, alternative 3 // // +----------------------+ -> C // | headers | // +----------------------+ // | multipart boundary | // +----------------------+ // | alternative 1 part a | // +----------------------+ // | multipart boundary | // +----------------------+ // | alternative 1 part b | // +----------------------+ -> D // _alternativeMultipart == DTM_FALSE // _alternativeMessage == DTM_TRUE // _alternativeValid == DTM_TRUE [this sample case can display this alt.] // _bp_cache = entries for part a and part b // _alt_msg_cache = empty // // +----------------------+ -> E // | headers | // +----------------------+ // | multipart boundary | // +----------------------+ // | alternative 2 part a | // +----------------------+ // | multipart boundary | // +----------------------+ // | alternative 2 part b | // +----------------------+ -> F // _alternativeMultipart == DTM_FALSE // _alternativeMessage == DTM_TRUE // _alternativeValid == DTM_TRUE [this sample case can display this alt.] // _bp_cache = entries for part a and part b // _alt_msg_cache = empty // // +----------------------+ -> G // | headers | // +----------------------+ // | multipart boundary | // +----------------------+ // | alternative 3 part a | // +----------------------+ // | multipart boundary | // +----------------------+ // | alternative 3 part b | // +----------------------+ -> H // _alternativeMultipart == DTM_FALSE // _alternativeMessage == DTM_TRUE // _alternativeValid == DTM_FALSE [this sample case can NOT display this alt.] // _bp_cache = entries for part a and part b // _alt_msg_cache = empty // void RFCMessage::parseMIMEMultipartAlternative(DtMailEnv & error, const char * boundary) { assert(_alternativeMessage == DTM_FALSE); assert(boundary != NULL); // Chew through anything that appears before the first boundary. // const char * body = _body_start; const char * body_end = _msg_end; int bndry_len = strlen(boundary); for (; body <= _msg_end; body++) { if (*body == '-' && *(body + 1) == '-' && strncmp(body + 2, boundary, bndry_len) == 0) { break; } } if (body > _msg_end || *(body + bndry_len + 2) == '-') { // No boundaries. Treat as one big message. // BodyPartCache * bpc = new BodyPartCache; bpc->body_start = _body_start; bpc->body = new MIMEBodyPart(error, this, _body_start, _msg_end - _body_start + 1, (RFCEnvelope *)_envelope); _bp_cache.append(bpc); return; } // We are sitting at the beginning of the first boundary // Construct an alternative message cache for the various alternative bodies // On input to this loop, // body -> first byte of body part including boundary // body_end -> end of message (_msg_end) // On each iteration through the loop, // - MIMEBodyPart is called with a body boundary of [body]...[body_end] // - MIMEBodyPart alters body_end to be the last byte of the message, // containing the boundary *only if* it is the last body part // do { AlternativeMessageCache * amc = new AlternativeMessageCache; amc->amc_body_start = body; amc->amc_body_end = body_end; amc->amc_body = new MIMEBodyPart(error, this, body, &amc->amc_body_end, boundary); body = amc->amc_body_end; if ( (amc->amc_body_end == (body_end+1)) && (strncmp(amc->amc_body_end-(bndry_len+3), boundary, bndry_len) == 0) ){ amc->amc_body_end -= bndry_len+5; } amc->amc_msg = new RFCMessage(error, amc->amc_body_start, amc->amc_body_end); _alt_msg_cache.append(amc); } while (body <= _msg_end); // Now we *should* go through the entire alternative message cache validating // each message to see if it can be displayed on this system. This is a task // left for a future exercise. For now, use the first alternative always. // AlternativeMessageCache *amc = _alt_msg_cache[_alt_msg_cache.length()-1]; int slotMax = amc->amc_msg->_bp_cache.length(); for (int slot = 0; slot < slotMax; slot++) { BodyPartCache * p_bpc = new BodyPartCache; BodyPartCache * c_bpc = amc->amc_msg->_bp_cache[slot]; p_bpc->body = c_bpc->body; p_bpc->body_start = c_bpc->body_start; _bp_cache.append(p_bpc); } _alternativeMultipart = DTM_TRUE; // main msg of multipart alternative } // parseMIMEMultipartMixed -- parse a MIME multipart/mixed message // Arguments: // error -- // boundary -- bodypart boundary // Outputs: // updated message pointers and constructed body part cache // Returns: // <> // Description: // Given a multipart mixed MIME message, parse the message, creating a body // part cache with an entry containing each body part. // void RFCMessage::parseMIMEMultipartMixed(DtMailEnv & error, const char * boundary) { assert(boundary != NULL); // Chew through anything that appears before the first boundary. // const char * body = _body_start; const char * body_end = _msg_end; int bndry_len = strlen(boundary); for (; body <= _msg_end; body++) { if (*body == '-' && *(body + 1) == '-' && strncmp(body + 2, boundary, bndry_len) == 0) { break; } } if (body > _msg_end || *(body + bndry_len + 2) == '-') { // No boundaries. Treat as one big message. // BodyPartCache * bpc = new BodyPartCache; bpc->body_start = _body_start; bpc->body = new MIMEBodyPart(error, this, _body_start, _msg_end - _body_start + 1, (RFCEnvelope *)_envelope); _bp_cache.append(bpc); return; } // We are sitting at the beginning of the first boundary // Construct a body part cache from the various body parts // On input to this loop, // body -> first byte of body part (including boundary) // body_end -> end of message (_msg_end) // On each iteration through the loop, // - MIMEBodyPart is called with a body boundary of [body]...[body_end] // - MIMEBodyPart alters body_end to be the last byte of the message, // containing the boundary *only if* it is the last body part // do { BodyPartCache * bpc = new BodyPartCache; bpc->body_start = body; bpc->body = new MIMEBodyPart(error, this, body, &body_end, boundary); _bp_cache.append(bpc); body = body_end; body_end = _msg_end; } while (body <= _msg_end); return; } void RFCMessage::parseV3Bodies(DtMailEnv & error) { error.clear(); // We have 3 choices here. We may have no content-type field, // in which case it is a simple RFC822 message. We handle those // in the V3 body code because this is really the legacy branch. // // We can have a content-type = text which is a single body part // of either 7 bit text, or 8 bit unencoded text. // // Finally we could have a Sun V3 multipart document. // DtMailValueSeq value; _envelope->getHeader(error, "Content-Type", DTM_FALSE, value); if (error.isSet()) { // Pretty simple. Pass the entire body and the envelope to // the V3 body constructor. // error.clear(); BodyPartCache * bpc = new BodyPartCache; bpc->body_start = _body_start; bpc->body = new V3BodyPart(error, this, _body_start, _msg_end - _body_start + 1, (RFCEnvelope *)_envelope); _bp_cache.append(bpc); return; } const char * content_type = *(value[0]); // If the type is text, or, if it isn't an attachment type we // understand treat the body as a single part. // if (strcasecmp(content_type, "text") == 0 || strcasecmp(content_type, "x-sun-attachment") != 0) { BodyPartCache * bpc = new BodyPartCache; bpc->body_start = _body_start; bpc->body = new V3BodyPart(error, this, _body_start, _msg_end - _body_start + 1, (RFCEnvelope *)_envelope); _bp_cache.append(bpc); } else { // We need to scan for each of the message boundaries and // let the body part object parse the important stuff. // // V3 bodies start with a sequence of 10 dashes. After that, // there *should* be a newline. After such a sequence, // the body part constructor will give us the body end. // const char *body; for (body = _body_start - 1; body <= _msg_end; body++) { if (*body == '\n' && strncmp(body + 1, "----------", 10) == 0 && (*(body + 11) == '\n' || *(body + 11) == '\r' && *(body + 12) == '\n')) { break; } } if (body > _msg_end) { // Well, we have some kind of inconsistency here. Let's // treat it as one big body part so we can display something. // BodyPartCache * bpc = new BodyPartCache; bpc->body_start = _body_start; bpc->body = new V3BodyPart(error, this, _body_start, _msg_end - _body_start + 1, (RFCEnvelope *)_envelope); _bp_cache.append(bpc); return; } body += 1; const char * body_end = _msg_end; do { BodyPartCache * bpc = new BodyPartCache; bpc->body_start = body; bpc->body = new V3BodyPart(error, this, body, &body_end); _bp_cache.append(bpc); body = body_end; body_end = _msg_end; } while (body <= _msg_end); } return; } int RFCMessage::sizeMIMEBodies(DtMailEnv &) { // We will use _msg_end - _body_start because this is // both the correct body length, and the one we updated. // return(_msg_end - _body_start + 1); } int RFCMessage::sizeV3Bodies(DtMailEnv &) { // We will use _msg_end - _body_start because this is // both the correct body length, and the one we updated. // return(_msg_end - _body_start + 1); } char * RFCMessage::extractBoundary(const char * content_type) { const char * boundary; if (!content_type) { char * new_bdry = (char *)malloc(78); sprintf(new_bdry, "%p-%08lx-%p", new_bdry, (long)time(NULL), &new_bdry); } // We will need the boundary to find the message boundaries. // for (boundary = content_type; *boundary; boundary++) { if (strncasecmp(boundary, "boundary=", 9) == 0) { break; } } if (!*boundary) { return(NULL); } // Get past uninteresting cruft. // boundary += 9; int quoted = 0; if (*boundary == '"') { boundary += 1; quoted = 1; } // Find the end of the boundary. This will be either the end of // the line, a quote, or a semi-colon. // const char *b_end; for (b_end = boundary; *b_end; b_end++) { if (quoted) { if (*b_end == '"') { break; } } else { if (*b_end == ';' || *b_end == '\r' || *b_end == '\n') { break; } } } int len = b_end - boundary + 1; char * result = (char *)malloc(len); strncpy(result, boundary, len - 1); result[len - 1] = 0; return(result); } DtMailBoolean RFCMessage::hasHeaders(const char * buf, const unsigned long len) { const char * start = buf; if (len > 5 && strncmp(buf, "From ", 5) == 0) { // Get past this line. Unix from line. It doesnt count as // a header for our purposes. // while (start < (buf + len) && *start != '\n') { start++; } start += 1; if (start >= (buf + len)) { return(DTM_FALSE); } } // At this point we should have something that looks like a header. // This will be a string with no spaces, terminated with a colon, // followed by some text. // const char *colon; for (colon = start; colon < (buf + len) && *colon != ':'; colon++) { continue; } if (colon >= (buf + len) || *colon != ':') { return(DTM_FALSE); } // RFC822 states: // Once a field has been unfolded, it may be viewed as being com- // posed of a field-name followed by a colon (":"), followed by a // field-body, and terminated by a carriage-return/line-feed. // The field-name must be composed of printable ASCII characters // (i.e., characters that have values between 33. and 126., // decimal, except colon). // // We should only find characters between 33 and 126 from the start // to the colon. Any exception means this is not a valid field-name, // and therefore, not a valid RFC822 header. // for (const char * check = start; check < colon; check++) { if (*check < 33 || *check > 126) { return(DTM_FALSE); } } // We passed the criteria, so this must be a header. // return(DTM_TRUE); }