2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
27 * $TOG: RFCMessage.C /main/9 1998/04/06 13:29:56 mgreess $
29 * RESTRICTED CONFIDENTIAL INFORMATION:
31 * The information in this document is subject to special
32 * restrictions in a confidential disclosure agreement bertween
33 * HP, IBM, Sun, USL, SCO and Univel. Do not distribute this
34 * document outside HP, IBM, Sun, USL, SCO, or Univel wihtout
35 * Sun's specific written approval. This documment and all copies
36 * and derivative works thereof must be returned or destroyed at
39 * Copyright 1993 Sun Microsystems, Inc. All rights reserved.
44 #include <EUSCompat.h>
50 #include <DtMail/DtMail.hh>
52 #include <DtMail/Threads.hh>
53 #include "str_utils.h"
55 const char * RFCDeleteHeader = "X-Dt-Delete-Time";
56 static unsigned long RFCMessageSignature = 0xff83421e;
58 // RFCMessage constructor for alternative message parts
61 // alt_start -- beginning of alternative message (at interpart boundary)
62 // alt_end -- last byte of alternative message
64 // RFCMessage * (with _alternativeMessage == DTM_TRUE) that has a completely
65 // parsed and setup message with body parts and minimal envelope.
67 // This constructor is used to create a message that is one of possibly
68 // many alternative messages that are part of a single MIME multipart
69 // alternative message.
71 RFCMessage::RFCMessage(DtMailEnv & error,
72 const char * alt_start,
74 : DtMail::Message(error, NULL), _bp_cache(8), _alt_msg_cache(0)
78 // Initialize objects within RFCMessage structure
80 _object_signature = RFCMessageSignature;
81 _dirty = 0; // can never be dirty
82 _alternativeMultipart = DTM_FALSE; // msg does not contain alternatives
83 _alternativeMessage = DTM_TRUE; // msg is an alternative message
84 _alternativeValid = DTM_FALSE; // msg is not valid (maybe later)
85 _msg_start = alt_start;
90 // parse the message, creating an envelope to encompass the first headers
91 // found and setting up the body part boundaries
93 (void) parseMsg(error, (char *) alt_end);
98 // parse the message bodies - this constructor is currently only called
99 // for a MIME multipart alternative message part. If other mail protocols
100 // are added that have similar functionality, this constructor will have
101 // to be augmented to take this into consideration
103 parseMIMEBodies(error);
112 RFCMessage::RFCMessage(DtMailEnv & error, DtMail::MailBox * parent,
114 const char * end_of_file)
115 : DtMail::Message(error, parent), _bp_cache(8), _alt_msg_cache(8)
117 _object_signature = RFCMessageSignature;
121 _session = _parent->session();
125 _alternativeMultipart = DTM_FALSE;
126 _alternativeMessage = DTM_FALSE;
127 _alternativeValid = DTM_FALSE;
131 *start = parseMsg(error, end_of_file);
133 // We want to be lazy about parsing the body. Most messages are
134 // never viewed in a folder. We have already hit the pages once,
135 // but with MADV_SEQUENTIAL on, they are marked for immediate
136 // disposal so they may be gone. Of course we are going to slow
137 // the first display of the message, but odds are that will never
140 // There is another reason for delaying the parsing on an MT hot
141 // platform. We are parsing on a separate thread. If we wait until
142 // we are asked for information about the bodies to parse them,
143 // we can get the main thread to do this work. So, we can actually
144 // get some concurrent parsing to occur.
146 // Conclusion: Lazy is better!
151 RFCMessage::RFCMessage(DtMailEnv & error,
152 DtMail::Session * session,
153 DtMailObjectSpace space,
157 : DtMail::Message(error, NULL), _bp_cache(8), _alt_msg_cache(8)
159 // We do different processing, depending on the space for the
165 case DtMailBufferObject:
167 _object_signature = RFCMessageSignature;
171 _alternativeMultipart = DTM_FALSE;
172 _alternativeMessage = DTM_FALSE;
173 _alternativeValid = DTM_FALSE;
175 _msg_buf = (DtMailBuffer *)arg;
177 // If the buffer is not null, then we need to parse the
180 if (_msg_buf->size > 0) {
181 if (hasHeaders((const char *)_msg_buf->buffer, _msg_buf->size) == DTM_FALSE) {
182 error.setError(DTME_UnknownFormat);
186 _msg_start = (char *)_msg_buf->buffer;
187 (void) parseMsg(error, (char *)_msg_buf->buffer + _msg_buf->size);
193 // We need to construct an empty message.
195 _msg_start = _msg_end = _body_start = NULL;
196 _envelope = new RFCEnvelope(error, this, NULL, 0);
201 case DtMailFileObject:
203 error.setError(DTME_NotSupported);
207 RFCMessage::~RFCMessage(void)
209 if (_object_signature == RFCMessageSignature) {
210 MutexLock lock_scope(_obj_mutex);
211 _object_signature = 0;
213 // spin through the body part cache deleting all parts cached.
214 // If _alternativeMultipart is set, this is the "main message" and
215 // as such all of the body parts are merely copied from one of the
216 // underlying alternative messages, so in this case we only delete the
217 // body part cache and not the contents of the cache, as that will be
218 // taken care of when the alternative message cache is purged.
220 while (_bp_cache.length()) {
221 BodyPartCache * bpc = _bp_cache[0];
222 if (!_alternativeMultipart)
228 // spin through the alternative message cache destroying any
229 // alternative messages which have been built to support a
230 // multipart alternative message
232 if (_alternativeMultipart == DTM_TRUE)
233 while (_alt_msg_cache.length()) {
234 AlternativeMessageCache * amc = _alt_msg_cache[0];
236 assert(amc->amc_msg != NULL);
238 assert(amc->amc_body != NULL);
239 delete amc->amc_body;
241 _alt_msg_cache.remove(0);
247 RFCMessage::toBuffer(DtMailEnv & error, DtMailBuffer & buf)
251 buf.size = _msg_end - _msg_start + 1;
252 buf.buffer = new char[buf.size];
253 if (buf.buffer == NULL) {
254 error.setError(DTME_NoMemory);
258 memcpy(buf.buffer, _msg_start, (size_t) buf.size);
262 RFCMessage::impl(DtMailEnv & error)
264 // By definition, we are contained within some RFC container. We
265 // want to make sure we return the exact same string, so just return
266 // what our containing parent uses for its impl ID string.
269 return(_parent->impl(error));
272 return("Internet MIME");
276 RFCMessage::getEnvelope(DtMailEnv & error)
283 RFCMessage::getBodyCount(DtMailEnv & error)
287 // We may need to parse the body!
289 if (_bp_cache.length() == 0) {
293 return(_bp_cache.length());
297 RFCMessage::getFirstBodyPart(DtMailEnv & error)
301 // We may need to parse the body!
303 if (_bp_cache.length() == 0 && _msg_start) {
307 if (_bp_cache.length() == 0) {
311 return(bodyPart(error, 0));
315 RFCMessage::getNextBodyPart(DtMailEnv & error, DtMail::BodyPart * last)
319 int slot = lookupByBody(last);
320 if (slot < 0 || (slot + 1) >= _bp_cache.length()) {
325 return(bodyPart(error, slot));
329 RFCMessage::newBodyPart(DtMailEnv & error, DtMail::BodyPart * after)
336 slot = lookupByBody(after);
339 RFCBodyPart * bp = new MIMEBodyPart(error,
345 BodyPartCache * bpc = new BodyPartCache;
347 bpc->body_start = NULL;
350 _bp_cache.append(bpc);
353 _bp_cache.insert(bpc, slot + 1);
360 RFCMessage::setFlag(DtMailEnv & error, const DtMailMessageState flag)
368 case DtMailMessageNew:
369 _envelope->setHeader(error, "Status", DTM_TRUE, "NR");
372 case DtMailMessageDeletePending:
373 // Start the delete time out by setting the X-Delete-Time header.
376 sprintf(str_time, "%08lX", (long)now);
377 _envelope->setHeader(error, RFCDeleteHeader, DTM_TRUE, str_time);
380 case DtMailMessagePartial:
382 // There is nothing to do for partial. It is already
383 // marked as partial as it has the Content-Type: message/partial
384 // header. This case statment exists so that SetFlag will not
385 // drop into the default 'error' below.
389 error.setError(DTME_OperationInvalid);
397 RFCMessage::resetFlag(DtMailEnv & error, const DtMailMessageState flag)
402 case DtMailMessageNew:
403 _envelope->setHeader(error, "Status", DTM_TRUE, "RO");
406 case DtMailMessageDeletePending:
407 _envelope->removeHeader(error, RFCDeleteHeader);
410 case DtMailMessagePartial:
412 // There is nothing to do for partial. It is already
413 // marked as partial as it has the Content-Type: message/partial
414 // header. This case statment exists so that resetFlag will not
415 // drop into the default 'error' below.
417 // You can not 'reset' message partial, you can only
418 // delete the header.
422 error.setError(DTME_OperationInvalid);
429 RFCMessage::flagIsSet(DtMailEnv & error, const DtMailMessageState flag)
431 DtMailValueSeq value;
432 DtMailBoolean answer = DTM_FALSE;
435 case DtMailMessageNew:
436 _envelope->getHeader(error, "Status", DTM_FALSE, value);
437 if (error.isNotSet()) {
438 const char * status = *(value[0]);
439 if (strcasecmp(status, "ro")) {
444 // No Status: means a new message.
451 case DtMailMessageDeletePending:
452 _envelope->getHeader(error, RFCDeleteHeader, DTM_FALSE, value);
453 if (error.isNotSet()) {
461 case DtMailMessageMultipart:
462 _envelope->getHeader(error, "Content-Type", DTM_FALSE, value);
463 if (error.isNotSet()) {
464 const char * type = *(value[0]);
465 if (strcasecmp(type, "X-Sun-Attachment") == 0 ||
466 strncasecmp(type, "multipart", 9) == 0) {
475 case DtMailMessagePartial:
476 _envelope->getHeader(error, "Content-Type", DTM_FALSE, value);
477 if (error.isNotSet()) {
478 const char * type = *(value[0]);
479 if (strncasecmp(type, "message/partial", 15) == 0) {
497 RFCMessage::getDeleteTime(DtMailEnv & error)
499 time_t delete_time = 0;
501 DtMailValueSeq value;
502 _envelope->getHeader(error, RFCDeleteHeader, DTM_FALSE, value);
503 if (error.isNotSet()) {
504 delete_time = (time_t) strtol(*(value[0]), NULL, 16);
513 RFCMessage::markDirty(const int delta)
516 ((RFCMailBox *)_parent)->markDirty(delta);
525 // Function: RFCMessage::fixMessageLocation - return address/length of message
527 // This function is used to obtain an address and length in bytes of a
530 // If the message is not dirty (e.g. already mapped into memory as one
531 // contiguous stream of bytes) then simply return its address and length;
532 // otherwise, must construct the message into a temporary area, the address
533 // and length of which is then returned.
535 // long & msgLen -- (output) length of message in bytes
536 // int & msgTemporary -- (output) 0 if message resides in previously
537 // allocated memory (e.g. mapped already),
538 // != 0 if temporary space allocated to hold
539 // message, requiring later deallocation
541 // const char * -> first byte of message
544 RFCMessage::fixMessageLocation(char ** msgHeaderStart, long & msgHeaderLen,
545 char ** msgBodyStart, long &msgBodyLen,
546 int & msgTemporary, long & msgBodyOffset)
549 // This is easy as the message is complete as is, simply return
550 // its current memory location and size in bytes. The message is
551 // "permanent" in that it does not currently reside in a temporary
552 // memory address which later has to be deallocated.
554 *msgHeaderStart = (char *)_msg_start;
555 msgHeaderLen = _body_start - _msg_start;
556 *msgBodyStart = (char *)_body_start;
557 msgBodyLen = (_msg_end - _body_start)+1;
559 msgBodyOffset = 0; // no header offset fudge factor
563 // Well, now it gets tricky. The message is not assembled in one
564 // place in memory, which prevents fixing an address and length
565 // for it, so we have to force the message to be constructed. This
566 // is done in a "temporary" memory area which has to be deallocated
567 // when the message is mapped in from a real file.
569 pinMessageDown(msgHeaderStart, msgHeaderLen, msgBodyStart, msgBodyLen);
570 msgBodyOffset = msgHeaderLen-(_body_start-_msg_start);
575 // Function: RFCMessage::pinMessageDown - fix complete message in memory
577 // This function is used on a "dirty" message to cause a complete
578 // in memory copy of the message to be created so that it can be
579 // written out (probably as a result of a mailbox save operation).
582 // long & msgLen -- (output) length of message in bytes
583 // long & msgBodyOffset -- (output) +- adjustment (between original message
584 // and new message) of the first byte at which the
585 // body of the message begins -- this lets us determine
586 // the body start in cases where the headers are changed
587 // without reparsing the message
589 // const char * -> first byte of message pinned in memory
592 RFCMessage::pinMessageDown(char ** msgHeaderStart, long & msgHeaderLen,
593 char ** msgBodyStart, long &msgBodyLen)
595 // First, determine which write method to use. We have
596 // 2 choices, Sun V3, and MIME. We will need to compute
597 // the size of the message, then make another pass to
598 // actually store the body.
602 DtMailValueSeq value;
604 // Compute content length of message
606 _envelope->getHeader(error, "Mime-Version", DTM_FALSE, value);
607 if (error.isNotSet()) {
608 content_length = sizeMIMEBodies(error);
612 content_length = sizeV3Bodies(error);
615 content_length = content_length < 0 ? 0 : content_length;
618 sprintf(len_buf, "%d", content_length);
619 _envelope->setHeader(error, "Content-Length", DTM_TRUE, len_buf);
621 // Allocate storage for the headers and write headers into it
623 const size_t maxHeaderLength =
624 (size_t) ((RFCEnvelope *)_envelope)->headerLength();
625 const size_t fudgeAtEnd = 102; // two extra \n's at end of msg + slop
626 size_t msgNewHeaderSize = (maxHeaderLength+fudgeAtEnd);
627 *msgHeaderStart = (char *)malloc(msgNewHeaderSize);
628 assert(*msgHeaderStart != NULL);
630 char * end = ((RFCEnvelope *)_envelope)->writeHeaders(*msgHeaderStart);
633 msgHeaderLen = end-*msgHeaderStart;
635 // Make sure we did not overflow the allocated area
637 assert(msgHeaderLen < msgNewHeaderSize);
639 // As of this implementation, body parts can never change, so we do not have to
640 // worry about the body part needing reconstruction
642 *msgBodyStart = (char *)_body_start;
643 msgBodyLen = (_msg_end - _body_start)+1;
649 RFCMessage::adjustMessageLocation(char * oldStart, char * newStart, long newLength, int msgTemporary, long newBodyOffset)
651 MutexLock lock_scope(_obj_mutex);
653 assert((_dirty && msgTemporary) || (!_dirty && !msgTemporary));
655 // At this point the message has been mapped into a new location in memory
656 // Must adjust all pointers to data contained within this message
658 int len = (int) ((_msg_end - _msg_start) + 1 + newBodyOffset);
659 assert(len == newLength);
660 _body_start = (_body_start - _msg_start) + newStart + newBodyOffset;
661 _msg_end = newStart + len - 1;
663 ((RFCEnvelope *)_envelope)->adjustHeaderLocation(newStart, (int)(_body_start-newStart));
664 int bpMaxSlot = _bp_cache.length();
666 for (bp = 0; bp < bpMaxSlot; bp++) {
667 char * new_loc = newStart + (_bp_cache[bp]->body_start - _msg_start) + newBodyOffset;
668 _bp_cache[bp]->body->adjustBodyPartsLocation(new_loc);
669 _bp_cache[bp]->body_start = new_loc;
672 // If this is the main portion of a multipart alternative message,
673 // we must handle each alternative and all of the body parts that
674 // comprise each alternative
676 if (_alternativeMultipart == DTM_TRUE) {
677 // Multipart Alternative message parent body
678 // Adjust all of the body parts cached for the alternatives
680 int altCacheMaxSlot = _alt_msg_cache.length();
681 for (int altCacheSlot = 0; altCacheSlot < altCacheMaxSlot; altCacheSlot++) {
682 AlternativeMessageCache *amc = _alt_msg_cache[altCacheSlot];
683 char *altNewStart = (((RFCEnvelope *) amc->amc_msg->_envelope)->headerLocation() - _msg_start) + newStart + newBodyOffset;
684 int altHeaderLength =
685 (int) ((RFCEnvelope *) amc->amc_msg->_envelope)->headerLength();
686 ((RFCEnvelope *) amc->amc_msg->_envelope)->adjustHeaderLocation(altNewStart, altHeaderLength);
687 bpMaxSlot = amc->amc_msg->_bp_cache.length();
688 for (bp = 0; bp < bpMaxSlot; bp++) {
689 char *new_loc = newStart + (amc->amc_msg->_bp_cache[bp]->body_start - _msg_start) + newBodyOffset;
690 amc->amc_msg->_bp_cache[bp]->body->adjustBodyPartsLocation(new_loc);
691 amc->amc_msg->_bp_cache[bp]->body_start = new_loc;
696 _msg_start = newStart;
698 // If this message is "temporary", it was dirty at the time its location
699 // was fixed, and therefore was placed into an area allocated on the fly.
700 // Since the message and all associated pointers no longer refer to this
701 // area, it must be free'd
711 RFCMessage::unfixMessageLocation(char * msgStart, int msgTemporary)
713 MutexLock lock_scope(_obj_mutex);
715 // If this message is "temporary", it was dirty at the time its location
716 // was fixed, and therefore was placed into an area allocated on the fly.
717 // Unfix is only called if an error has occurred which requires the
718 // new message to be tossed, so we must free any storage allocated to
719 // this message by a previous "fixMessageLocation".
726 RFCMessage::bodyPart(DtMailEnv & error, const int slot)
730 MutexLock lock_scope(_obj_mutex);
732 return(_bp_cache[slot]->body);
736 RFCMessage::lookupByBody(DtMail::BodyPart * bp)
738 for (int slot = 0; slot < _bp_cache.length(); slot++) {
739 if (_bp_cache[slot]->body == bp) {
748 backcrlf(const char * pos)
750 const char * back = pos;
762 RFCMessage::parseMsg(DtMailEnv & error,
763 const char * end_of_file)
765 // We need to find the end of the envelope. This is signified by a
766 // single blank line. This should simply be "\n\n" in the message
767 // but RFC says parsers should be forgiving so we will tolerate
768 // "\n<any-white-space>\n"
771 for (hdr_end = _msg_start; hdr_end <= end_of_file; hdr_end++) {
772 if (*hdr_end == '\n') {
774 for (const char * blanks = hdr_end + 1;
775 *blanks != '\n' && blanks <= end_of_file; blanks++) {
776 if (!isspace((unsigned char)*blanks)) {
783 // Found the end of the headers.
789 if (hdr_end > end_of_file) { // Ran off the end.
790 error.setError(DTME_NotMailBox);
791 return(end_of_file + 1);
794 // We need to parse the headers now, because they will give us the
795 // content length, type, and a message id.
797 _envelope = new RFCEnvelope(error, this, _msg_start, hdr_end - _msg_start + 1);
799 // Oops! We need to find the next "From " line if possible to at least
800 // let the rest of the parsing proceed.
802 const char *next_from;
803 for (next_from = hdr_end + 1;
804 next_from <= (end_of_file - 6); next_from++) {
805 if (strncmp(next_from, "\nFrom ", 6) == 0) {
809 const char * new_end;
810 if (next_from > (end_of_file - 6)) {
811 new_end = end_of_file + 1;
814 new_end = next_from + 1;
819 // The hdr_end now points at the last newline of the envelope.
820 // We want the body to start after the next newline, but we must
821 // be prepared to stop if the end of the file is seen before the
822 // body start is found, which can happen if the last message in
823 // the inbox has no contents at all.
825 for (_body_start = hdr_end + 1; _body_start <= end_of_file && *_body_start != '\n'; _body_start++) {
829 if (_body_start > end_of_file) // past the end??
830 _body_start = end_of_file; // yes: body starts on last byte
832 // Need to know where this message ends.
834 const char *messageEnd = findMsgEnd(error, end_of_file);
836 // Need to guard against the case where the current body begins
837 // past the "end" of the current message - this can happen if a
838 // message consists of [this_msg_headers\n][\n][next_msg_headers]
839 // where there is a zero length (no) body.
841 if (_body_start > _msg_end) // Does body start after last byte in message?
842 _body_start = _msg_end; // Yes: force body to start at last byte of message
847 // returns -> last byte of current message
850 RFCMessage::findMsgEnd(DtMailEnv & error, const char * eof)
852 // This function sets "_msg_end" to ( "_body_start" - 1 + length_of_message ).
853 // This function returns a pointer to the beginning of the next envelope "From " or (eof+1).
854 // See if we have a content length. If so, then will try it first.
860 _msg_end = _body_start;
862 DtMailValueSeq value;
863 _envelope->getHeader(error, "content-length", DTM_FALSE, value);
864 if (error.isNotSet()) {
865 content_length = atol(*(value[0]));
867 // Look forward content_length amount and see if we are at
868 // the end (but not beyond) of file, or have a "\nFrom ".
869 // We must be careful here when using content_length as it *may*
870 // be a really large (bogus) number and the addition of that number
871 // to the start of the body *may* wrap around the top of the address
872 // space, thus resulting in a memory access violation.
874 const char * next_msg = _body_start + content_length;
875 const int contentLengthFudgeFactor = 30; // "From" header min length
877 if (next_msg < _body_start) // message wrapped address space?
878 next_msg = eof+contentLengthFudgeFactor+1; // yes: downsize it
881 // A special case for content-length = 0. Some folders only
882 // have one new line in this case which will confuse us
883 // unless we do something special.
885 if (content_length == 0) {
887 if (next_msg <= eof && strncmp(next_msg, "\nFrom ", 6) == 0) {
889 return(next_msg + 1);
893 if (next_msg <= (eof - 6) && strncmp(next_msg, "\nFrom ", 6) == 0) {
895 _msg_end = backcrlf(next_msg);
896 return(next_msg + 1);
899 // Content length extends beyond the eof!
900 // Here it becomes tricky (see bug 1204026). We do not want to
901 // cause the partial receipt of a very large mail message to
902 // cause other valid mail messages to be "gobbled up" in this
903 // message; therefore, we apply a small fudge factor rule here.
904 // If the current end of message is within X bytes (about the
905 // size of a reasonable "From" header) then we take the current
906 // end to be fore this message; otherwise, err on the side of
907 // bursting this message into multiple smaller ones by doing
908 // the "From" scan dance.
910 if ( (next_msg > eof)
911 && (next_msg < eof+contentLengthFudgeFactor) ) {
916 // We need to deal with extraneous white space at the end of
919 if (next_msg <= eof) {
921 for (white = next_msg; white <= eof && isspace((unsigned char)*white);
927 _msg_end = backcrlf(next_msg);
931 // See if we are at a "\nFrom ". If so, consider it a
932 // case of off by one and just accept it. This will save
933 // us from scanning the entire message when the content-length
934 // simply didnt get trailing white space right.
936 if (strncmp(white - 1, "\nFrom ", 6) == 0) {
937 _msg_end = white - 2;
946 // If the message was sent with X-Lines, we can use it to help
947 // us find the end of the message.
950 _envelope->getHeader(error, "x-lines", DTM_FALSE, value);
951 if (error.isNotSet()) {
952 int xlines = (int) atol(*(value[0]));
955 for (_msg_end = _body_start; _msg_end <= eof; _msg_end++) {
956 if (*_msg_end == '\n') {
958 if (lcnt == xlines) {
965 if (strncmp(_msg_end, "\nFrom ", 6) != 0) {
966 // Well, looks like we have to do it the costly way. Scan the
967 // folder until we hit the end of file, or we hit a "From " at
968 // the start of a line.
970 for (_msg_end = _body_start - 1; _msg_end <= (eof - 6); _msg_end++) {
971 if (strncmp(_msg_end, "\nFrom ", 6) == 0) {
977 // The end is either the eof, or the from line.
979 const char * real_end = _msg_end + 1;
980 if (_msg_end > (eof - 6)) {
985 // Again, protect against NULL mesages with 1 blank line
986 // before next message. Size was < 0 and crashed in "memcpy".
987 if ( _msg_end >= _body_start )
988 _msg_end = backcrlf(_msg_end);
991 // Let's put a content length on this thing so we won't have to go
992 // through this silliness again!
994 content_length = _msg_end - _body_start + 1;
995 content_length = content_length < 0 ? 0 : content_length;
998 sprintf(buf, "%lu", content_length);
999 _envelope->setHeader(error, "Content-Length", DTM_TRUE, buf);
1004 // This is a list of all of the Sun V3 specific headers
1006 static const char *SCANLIST[] = {
1008 "x-sun-content-length",
1009 "x-sun-content-lines",
1010 "x-sun-data-description",
1014 "x-sun-encoding-info",
1015 "x-sun-reference-file",
1020 RFCMessage::parseBodies(DtMailEnv & error)
1024 // First we need to determine which type of message bodies we
1025 // have. There are actually 3 possibilities: single body part,
1026 // Sun V3, or MIME. The main difference in the last 2 is the
1027 // headers and delimiters used.
1029 DtMailValueSeq value;
1030 _envelope->getHeader(error, "Mime-Version", DTM_FALSE, value);
1031 if (error.isNotSet()) {
1032 parseMIMEBodies(error);
1037 // We need to try a common MIME header to see if maybe the
1038 // message is just poorly formed. We will examine the content
1039 // type to see if smells like a MIME type.
1042 _envelope->getHeader(error, "Content-Type", DTM_FALSE, value);
1043 if (error.isSet()) {
1044 // No content-type or Mime-Version header: treat as V3
1047 parseV3Bodies(error);
1050 const char * content_type = *(value[0]);
1051 // Handle "Content-Type: text" problem with /usr/lib/mail.local:
1052 // Apparently /usr/lib/mail.local will insert a "Content-Type: text"
1053 // header into received mail if no other Content-Type field is present.
1054 // The format of this header is consistent with RFC 1049 (March 1988)
1055 // which has since been superceeded by the MIME RFC 1521.
1057 if (strpbrk(content_type, "/;")) {
1058 // sub-types identify this as MIME formatted e-mail
1060 parseMIMEBodies(error);
1062 else if (strcasecmp(content_type, "text")==0) {
1063 // Problem time - no Mime-Version is present yet there is a
1064 // content-type: text which means it could either be a Sun V3 OR
1065 // a very poorly formatted MIME message: see if any V3 message
1066 // headers are found and if so treat as V3 else treat as MIME
1069 for (cp = SCANLIST; *cp; cp++) {
1070 _envelope->getHeader(error, *cp, DTM_FALSE, value);
1071 if (error.isNotSet())
1076 // Found v3 header - treat as such
1078 parseV3Bodies(error);
1081 // No v3 headers found - treat as MIME
1083 parseMIMEBodies(error);
1087 parseV3Bodies(error);
1096 RFCMessage::parseMIMEBodies(DtMailEnv & error)
1100 // MIME messages have many different forms, all of which we have to
1101 // take into account when parsing the bodies. If there is *no* MIME
1102 // Content-Type field, then we treat the entire body of the message
1103 // as a single body part of Content-Type: text/plain.
1105 DtMailValueSeq value;
1106 _envelope->getHeader(error, "Content-Type", DTM_FALSE, value);
1107 if (error.isSet()) {
1108 parseMIMETextPlain(error);
1112 // We have a MIME Content-Type field - handle the field appropriately
1113 const char * content_type = *(value[0]);
1114 if (strncasecmp(content_type, "multipart/", 10) == 0)
1115 parseMIMEMultipartSubtype(error, content_type+9);
1116 else if (strncasecmp(content_type, "message/", 8) == 0)
1117 parseMIMEMessageSubtype(error, content_type+7);
1118 else // Unknown MIME Content-Type field - handle as text/plain.
1119 parseMIMETextPlain(error);
1124 // parseMIMEMultipartSubtype -- parse a MIME Multipart content-type
1127 // subtype -- -> Multipart subtype string (begining with "/")
1129 // Handle the parsing of the MIME Multipart content-type message according
1130 // to the Multipart sub-type.
1134 // Call this function to handle the parsing of a MIME Multipart subtype.
1137 RFCMessage::parseMIMEMultipartSubtype(DtMailEnv & error,
1138 const char * subtype)
1140 // Have a multipart message to digest - determine type of multipart message
1142 const char * boundary = (const char *) extractBoundary(subtype);
1144 // odd--there is no boundary in the multipart/ specification?
1145 // This is specifically required as per RFC 1521:
1146 // The only mandatory parameter for the multipart Content-Type is
1147 // the boundary parameter.
1148 // Treat as one big unbounded message message.
1150 BodyPartCache * bpc = new BodyPartCache;
1151 bpc->body_start = _body_start;
1152 bpc->body = new MIMEBodyPart(error, this, _body_start,
1153 _msg_end - _body_start + 1,
1154 (RFCEnvelope *)_envelope);
1156 _bp_cache.append(bpc);
1160 if (strncasecmp(subtype, "/alternative", 12)==0)
1161 parseMIMEMultipartAlternative(error, boundary);
1162 else if (strncasecmp(subtype, "/mixed", 6)==0)
1163 parseMIMEMultipartMixed(error, boundary);
1164 else // unknown types handled as Multipart/mixed
1165 parseMIMEMultipartMixed(error, boundary);
1167 assert(boundary != NULL);
1168 free((void *)boundary);
1172 // parseMIMEMessageExternalBody -- parse a MIME Message/External-body message
1176 // Process a MIME Message/External-body message
1180 // Call this function to handle a MIME Message/External-body message
1183 RFCMessage::parseMIMEMessageExternalBody(DtMailEnv & error)
1185 // Have a Message/External-Body message to digest - real dumb here,
1186 // include entire message including the headers as a single text/plain
1189 BodyPartCache * bpc = new BodyPartCache;
1190 bpc->body_start = _msg_start;
1191 bpc->body = new MIMEBodyPart(error, this, _msg_start,
1192 _msg_end - _msg_start + 1,
1193 (RFCEnvelope *)_envelope);
1195 _bp_cache.append(bpc);
1200 // parseMIMEMessageSubtype -- parse a MIME Message content-type
1203 // subtype -- -> Message subtype string (begining with "/")
1205 // Handle the parsing of the MIME Message content-type message according
1206 // to the Message sub-type.
1210 // Call this function to handle the parsing of a MIME Message subtype.
1213 RFCMessage::parseMIMEMessageSubtype(DtMailEnv & error,
1214 const char * subtype)
1216 // Have a Message message to digest - determine type of Message message
1219 if (strncasecmp(subtype, "/external-body", 14) == 0)
1220 parseMIMEMessageExternalBody(error);
1222 parseMIMETextPlain(error);
1226 // parseMIMETextPlain -- parse a MIME Text/Plain message
1230 // Construct single body part cache component encompassing the entire body
1231 // of the message as defined by _body_start for (_msg_end-_body_start)+1 bytes
1235 // Call this function to take the current message body and treat it as
1236 // a single message of type text/plain.
1239 RFCMessage::parseMIMETextPlain(DtMailEnv & error)
1243 // This is a single body part of text/plain.
1245 BodyPartCache * bpc = new BodyPartCache;
1246 bpc->body_start = _body_start;
1247 bpc->body = new MIMEBodyPart(error, this, _body_start,
1248 _msg_end - _body_start + 1,
1249 (RFCEnvelope *)_envelope);
1251 _bp_cache.append(bpc);
1256 // parseMIMEMultipartAlternative -- parse a MIME multipart/alternative message
1259 // boundary -- interpart boundary
1261 // updated message pointers, constructed body part cache, and
1262 // constructed alternative message cache
1266 // Given a multipart alternative MIME message, parse the message, creating
1267 // a body part cache with an entry containing each body part, and ???GMG???
1269 // Here is how a multipart alternative message will be parsed into memory,
1270 // where alternatives 1 and 2 can be displayed / processed on this system,
1271 // but alternative 3 can not.
1273 // +--------------------+ -> A
1275 // +--------------------+ -> B
1276 // | interpart boundary |
1277 // +--------------------+ -> C
1278 // | alternative 1 |
1279 // +--------------------+ -> D
1280 // | interpart boundary |
1281 // +--------------------+ -> E
1282 // | alternative 2 |
1283 // +--------------------+ -> F
1284 // | interpart boundary |
1285 // +--------------------+ -> G
1286 // | alternative 3 |
1287 // +--------------------+ -> H
1288 // _alternativeMultipart == DTM_TRUE
1289 // _alternativeMessage == DTM_FALSE
1290 // _alternativeValid == DTM_FALSE
1291 // _bp_cache = contents of alternative 2's bp_cache (because 1-2 valid, 3 not)
1292 // _alt_msg_cache = entries for alternative 1, alternative 2, alternative 3
1294 // +----------------------+ -> C
1296 // +----------------------+
1297 // | multipart boundary |
1298 // +----------------------+
1299 // | alternative 1 part a |
1300 // +----------------------+
1301 // | multipart boundary |
1302 // +----------------------+
1303 // | alternative 1 part b |
1304 // +----------------------+ -> D
1305 // _alternativeMultipart == DTM_FALSE
1306 // _alternativeMessage == DTM_TRUE
1307 // _alternativeValid == DTM_TRUE [this sample case can display this alt.]
1308 // _bp_cache = entries for part a and part b
1309 // _alt_msg_cache = empty
1311 // +----------------------+ -> E
1313 // +----------------------+
1314 // | multipart boundary |
1315 // +----------------------+
1316 // | alternative 2 part a |
1317 // +----------------------+
1318 // | multipart boundary |
1319 // +----------------------+
1320 // | alternative 2 part b |
1321 // +----------------------+ -> F
1322 // _alternativeMultipart == DTM_FALSE
1323 // _alternativeMessage == DTM_TRUE
1324 // _alternativeValid == DTM_TRUE [this sample case can display this alt.]
1325 // _bp_cache = entries for part a and part b
1326 // _alt_msg_cache = empty
1328 // +----------------------+ -> G
1330 // +----------------------+
1331 // | multipart boundary |
1332 // +----------------------+
1333 // | alternative 3 part a |
1334 // +----------------------+
1335 // | multipart boundary |
1336 // +----------------------+
1337 // | alternative 3 part b |
1338 // +----------------------+ -> H
1339 // _alternativeMultipart == DTM_FALSE
1340 // _alternativeMessage == DTM_TRUE
1341 // _alternativeValid == DTM_FALSE [this sample case can NOT display this alt.]
1342 // _bp_cache = entries for part a and part b
1343 // _alt_msg_cache = empty
1346 RFCMessage::parseMIMEMultipartAlternative(DtMailEnv & error, const char * boundary)
1348 assert(_alternativeMessage == DTM_FALSE);
1349 assert(boundary != NULL);
1351 // Chew through anything that appears before the first boundary.
1353 const char * body = _body_start;
1354 const char * body_end = _msg_end;
1356 int bndry_len = strlen(boundary);
1358 for (; body <= _msg_end; body++) {
1360 *(body + 1) == '-' &&
1361 strncmp(body + 2, boundary, bndry_len) == 0) {
1366 if (body > _msg_end ||
1367 *(body + bndry_len + 2) == '-') {
1368 // No boundaries. Treat as one big message.
1370 BodyPartCache * bpc = new BodyPartCache;
1371 bpc->body_start = _body_start;
1372 bpc->body = new MIMEBodyPart(error, this, _body_start,
1373 _msg_end - _body_start + 1,
1374 (RFCEnvelope *)_envelope);
1376 _bp_cache.append(bpc);
1380 // We are sitting at the beginning of the first boundary
1381 // Construct an alternative message cache for the various alternative bodies
1382 // On input to this loop,
1383 // body -> first byte of body part including boundary
1384 // body_end -> end of message (_msg_end)
1385 // On each iteration through the loop,
1386 // - MIMEBodyPart is called with a body boundary of [body]...[body_end]
1387 // - MIMEBodyPart alters body_end to be the last byte of the message,
1388 // containing the boundary *only if* it is the last body part
1391 AlternativeMessageCache * amc = new AlternativeMessageCache;
1392 amc->amc_body_start = body;
1393 amc->amc_body_end = body_end;
1394 amc->amc_body = new MIMEBodyPart(error, this, body, &amc->amc_body_end, boundary);
1395 body = amc->amc_body_end;
1396 if ( (amc->amc_body_end == (body_end+1))
1397 && (strncmp(amc->amc_body_end-(bndry_len+3), boundary, bndry_len) == 0) ){
1398 amc->amc_body_end -= bndry_len+5;
1400 amc->amc_msg = new RFCMessage(error, amc->amc_body_start, amc->amc_body_end);
1401 _alt_msg_cache.append(amc);
1402 } while (body <= _msg_end);
1404 // Now we *should* go through the entire alternative message cache validating
1405 // each message to see if it can be displayed on this system. This is a task
1406 // left for a future exercise. For now, use the first alternative always.
1408 AlternativeMessageCache *amc = _alt_msg_cache[_alt_msg_cache.length()-1];
1409 int slotMax = amc->amc_msg->_bp_cache.length();
1410 for (int slot = 0; slot < slotMax; slot++) {
1411 BodyPartCache * p_bpc = new BodyPartCache;
1412 BodyPartCache * c_bpc = amc->amc_msg->_bp_cache[slot];
1413 p_bpc->body = c_bpc->body;
1414 p_bpc->body_start = c_bpc->body_start;
1415 _bp_cache.append(p_bpc);
1418 _alternativeMultipart = DTM_TRUE; // main msg of multipart alternative
1421 // parseMIMEMultipartMixed -- parse a MIME multipart/mixed message
1424 // boundary -- bodypart boundary
1426 // updated message pointers and constructed body part cache
1430 // Given a multipart mixed MIME message, parse the message, creating a body
1431 // part cache with an entry containing each body part.
1434 RFCMessage::parseMIMEMultipartMixed(DtMailEnv & error, const char * boundary)
1436 assert(boundary != NULL);
1438 // Chew through anything that appears before the first boundary.
1440 const char * body = _body_start;
1441 const char * body_end = _msg_end;
1443 int bndry_len = strlen(boundary);
1445 for (; body <= _msg_end; body++) {
1447 *(body + 1) == '-' &&
1448 strncmp(body + 2, boundary, bndry_len) == 0) {
1453 if (body > _msg_end ||
1454 *(body + bndry_len + 2) == '-') {
1455 // No boundaries. Treat as one big message.
1457 BodyPartCache * bpc = new BodyPartCache;
1458 bpc->body_start = _body_start;
1459 bpc->body = new MIMEBodyPart(error, this, _body_start,
1460 _msg_end - _body_start + 1,
1461 (RFCEnvelope *)_envelope);
1463 _bp_cache.append(bpc);
1467 // We are sitting at the beginning of the first boundary
1468 // Construct a body part cache from the various body parts
1469 // On input to this loop,
1470 // body -> first byte of body part (including boundary)
1471 // body_end -> end of message (_msg_end)
1472 // On each iteration through the loop,
1473 // - MIMEBodyPart is called with a body boundary of [body]...[body_end]
1474 // - MIMEBodyPart alters body_end to be the last byte of the message,
1475 // containing the boundary *only if* it is the last body part
1478 BodyPartCache * bpc = new BodyPartCache;
1479 bpc->body_start = body;
1480 bpc->body = new MIMEBodyPart(error, this, body, &body_end, boundary);
1481 _bp_cache.append(bpc);
1484 body_end = _msg_end;
1486 } while (body <= _msg_end);
1491 RFCMessage::parseV3Bodies(DtMailEnv & error)
1495 // We have 3 choices here. We may have no content-type field,
1496 // in which case it is a simple RFC822 message. We handle those
1497 // in the V3 body code because this is really the legacy branch.
1499 // We can have a content-type = text which is a single body part
1500 // of either 7 bit text, or 8 bit unencoded text.
1502 // Finally we could have a Sun V3 multipart document.
1504 DtMailValueSeq value;
1505 _envelope->getHeader(error, "Content-Type", DTM_FALSE, value);
1506 if (error.isSet()) {
1507 // Pretty simple. Pass the entire body and the envelope to
1508 // the V3 body constructor.
1511 BodyPartCache * bpc = new BodyPartCache;
1512 bpc->body_start = _body_start;
1513 bpc->body = new V3BodyPart(error, this, _body_start,
1514 _msg_end - _body_start + 1,
1515 (RFCEnvelope *)_envelope);
1517 _bp_cache.append(bpc);
1522 const char * content_type = *(value[0]);
1524 // If the type is text, or, if it isn't an attachment type we
1525 // understand treat the body as a single part.
1527 if (strcasecmp(content_type, "text") == 0 ||
1528 strcasecmp(content_type, "x-sun-attachment") != 0) {
1529 BodyPartCache * bpc = new BodyPartCache;
1530 bpc->body_start = _body_start;
1531 bpc->body = new V3BodyPart(error, this, _body_start,
1532 _msg_end - _body_start + 1,
1533 (RFCEnvelope *)_envelope);
1535 _bp_cache.append(bpc);
1538 // We need to scan for each of the message boundaries and
1539 // let the body part object parse the important stuff.
1541 // V3 bodies start with a sequence of 10 dashes. After that,
1542 // there *should* be a newline. After such a sequence,
1543 // the body part constructor will give us the body end.
1546 for (body = _body_start - 1; body <= _msg_end; body++) {
1547 if (*body == '\n' &&
1548 strncmp(body + 1, "----------", 10) == 0 &&
1549 (*(body + 11) == '\n' ||
1550 *(body + 11) == '\r' && *(body + 12) == '\n')) {
1554 if (body > _msg_end) {
1555 // Well, we have some kind of inconsistency here. Let's
1556 // treat it as one big body part so we can display something.
1558 BodyPartCache * bpc = new BodyPartCache;
1559 bpc->body_start = _body_start;
1560 bpc->body = new V3BodyPart(error, this, _body_start,
1561 _msg_end - _body_start + 1,
1562 (RFCEnvelope *)_envelope);
1564 _bp_cache.append(bpc);
1570 const char * body_end = _msg_end;
1573 BodyPartCache * bpc = new BodyPartCache;
1574 bpc->body_start = body;
1575 bpc->body = new V3BodyPart(error, this, body, &body_end);
1576 _bp_cache.append(bpc);
1578 body_end = _msg_end;
1579 } while (body <= _msg_end);
1586 RFCMessage::sizeMIMEBodies(DtMailEnv &)
1588 // We will use _msg_end - _body_start because this is
1589 // both the correct body length, and the one we updated.
1591 return(_msg_end - _body_start + 1);
1595 RFCMessage::sizeV3Bodies(DtMailEnv &)
1597 // We will use _msg_end - _body_start because this is
1598 // both the correct body length, and the one we updated.
1600 return(_msg_end - _body_start + 1);
1604 RFCMessage::extractBoundary(const char * content_type)
1606 const char * boundary;
1608 if (!content_type) {
1609 char * new_bdry = (char *)malloc(78);
1610 sprintf(new_bdry, "%p-%08lx-%p", new_bdry, (long)time(NULL), &new_bdry);
1613 // We will need the boundary to find the message boundaries.
1615 for (boundary = content_type; *boundary; boundary++) {
1616 if (strncasecmp(boundary, "boundary=", 9) == 0) {
1624 // Get past uninteresting cruft.
1628 if (*boundary == '"') {
1633 // Find the end of the boundary. This will be either the end of
1634 // the line, a quote, or a semi-colon.
1637 for (b_end = boundary; *b_end; b_end++) {
1639 if (*b_end == '"') {
1644 if (*b_end == ';' || *b_end == '\r' || *b_end == '\n') {
1650 int len = b_end - boundary + 1;
1651 char * result = (char *)malloc(len);
1652 strncpy(result, boundary, len - 1);
1653 result[len - 1] = 0;
1659 RFCMessage::hasHeaders(const char * buf, const unsigned long len)
1661 const char * start = buf;
1663 if (len > 5 && strncmp(buf, "From ", 5) == 0) {
1664 // Get past this line. Unix from line. It doesnt count as
1665 // a header for our purposes.
1667 while (start < (buf + len) && *start != '\n') {
1673 if (start >= (buf + len)) {
1678 // At this point we should have something that looks like a header.
1679 // This will be a string with no spaces, terminated with a colon,
1680 // followed by some text.
1683 for (colon = start; colon < (buf + len) && *colon != ':'; colon++) {
1687 if (colon >= (buf + len) || *colon != ':') {
1692 // Once a field has been unfolded, it may be viewed as being com-
1693 // posed of a field-name followed by a colon (":"), followed by a
1694 // field-body, and terminated by a carriage-return/line-feed.
1695 // The field-name must be composed of printable ASCII characters
1696 // (i.e., characters that have values between 33. and 126.,
1697 // decimal, except colon).
1699 // We should only find characters between 33 and 126 from the start
1700 // to the colon. Any exception means this is not a valid field-name,
1701 // and therefore, not a valid RFC822 header.
1703 for (const char * check = start; check < colon; check++) {
1704 if (*check < 33 || *check > 126) {
1709 // We passed the criteria, so this must be a header.