/* * MIME mail decoding. * * This module contains decoding routines for converting * quoted-printable data into pure 8-bit data, in MIME * formatted messages. * * By Henrik Storner * * Configuration file support for fetchmail 4.3.8 by * Frank Damgaard * * For license terms, see the file COPYING in this directory. */ #include "config.h" #include #include #include #include #include "fetchmail.h" #include "i18n.h" static unsigned char unhex(unsigned char c) { if ((c >= '0') && (c <= '9')) return (c - '0'); else if ((c >= 'A') && (c <= 'F')) return (c - 'A' + 10); else if ((c >= 'a') && (c <= 'f')) return (c - 'a' + 10); else return 16; /* invalid hex character */ } static int qp_char(unsigned char c1, unsigned char c2, char *c_out) { c1 = unhex(c1); c2 = unhex(c2); if ((c1 > 15) || (c2 > 15)) return 1; else { *c_out = 16*c1+c2; return 0; } } /* * Routines to decode MIME QP-encoded headers, as per RFC 2047. */ /* States of the decoding state machine */ #define S_COPY_PLAIN 0 /* Just copy, but watch for the QP flag */ #define S_SKIP_MIMEINIT 1 /* Get the encoding, and skip header */ #define S_COPY_MIME 2 /* Decode a sequence of coded characters */ static const char MIMEHDR_INIT[] = "=?"; /* Start of coded sequence */ static const char MIMEHDR_END[] = "?="; /* End of coded sequence */ void UnMimeHeader(char *hdr) { /* Decode a buffer containing data encoded according to RFC * 2047. This only handles content-transfer-encoding; conversion * between character sets is not implemented. In other words: We * assume the charsets used can be displayed by your mail program * without problems. */ /* Note: Decoding is done "in-situ", i.e. without using an * additional buffer for temp. storage. This is possible, since the * decoded string will always be shorter than the encoded string, * due to the encoding scheme. */ int state = S_COPY_PLAIN; char *p_in, *p_out, *p; char enc = '\0'; /* initialization pacifies -Wall */ int i; /* Speed up in case this is not a MIME-encoded header */ p = strstr(hdr, MIMEHDR_INIT); if (p == NULL) return; /* No MIME header */ /* Loop through the buffer. * p_in : Next char to be processed. * p_out: Where to put the next processed char * enc : Encoding used (usually, 'q' = quoted-printable) */ for (p_out = p_in = hdr; (*p_in); ) { switch (state) { case S_COPY_PLAIN: p = strstr(p_in, MIMEHDR_INIT); if (p == NULL) { /* * No more coded data in buffer, * just move remainder into place. */ i = strlen(p_in); /* How much left */ memmove(p_out, p_in, i); p_in += i; p_out += i; } else { /* MIME header init found at location p */ if (p > p_in) { /* There are some uncoded chars at the beginning. */ i = (p - p_in); memmove(p_out, p_in, i); p_out += i; } p_in = (p + 2); state = S_SKIP_MIMEINIT; } break; case S_SKIP_MIMEINIT: /* Mime type definition: "charset?encoding?" */ p = strchr(p_in, '?'); if (p != NULL) { /* p_in .. (p-1) holds the charset */ /* *(p+1) is the transfer encoding, *(p+2) must be a '?' */ if (*(p+2) == '?') { enc = tolower((unsigned char)*(p+1)); p_in = p+3; state = S_COPY_MIME; } else state = S_COPY_PLAIN; } else state = S_COPY_PLAIN; /* Invalid data */ break; case S_COPY_MIME: p = strstr(p_in, MIMEHDR_END); /* Find end of coded data */ if (p == NULL) p = p_in + strlen(p_in); for (; (p_in < p); ) { /* Decode all encoded data */ if (enc == 'q') { if (*p_in == '=') { /* Decode one char qp-coded at (p_in+1) and (p_in+2) */ if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0) p_in += 3; else { /* Invalid QP data - pass through unchanged. */ *p_out = *p_in; p_in++; } } else if (*p_in == '_') { /* * RFC 2047: '_' inside encoded word represents 0x20. * NOT a space - always the value 0x20. */ *p_out = 0x20; p_in++; } else { /* Copy unchanged */ *p_out = *p_in; p_in++; } p_out++; } else if (enc == 'b') { /* Decode base64 encoded data */ char delimsave; int decoded_count; delimsave = *p; *p = '\r'; decoded_count = from64tobits(p_out, p_in, 0); *p = delimsave; if (decoded_count > 0) p_out += decoded_count; p_in = p; } else { /* Copy unchanged */ *p_out = *p_in; p_in++; p_out++; } } if (*p_in) p_in += 2; /* Skip the MIMEHDR_END delimiter */ /* * We've completed decoding one encoded sequence. But another * may follow immediately, in which case whitespace before the * new MIMEHDR_INIT delimiter must be discarded. * See if that is the case */ p = strstr(p_in, MIMEHDR_INIT); state = S_COPY_PLAIN; if (p != NULL) { /* * There is more MIME data later on. Is there * whitespace only before the delimiter? */ char *q; int wsp_only = 1; for (q=p_in; (wsp_only && (q < p)); q++) wsp_only = isspace((unsigned char)*q); if (wsp_only) { /* * Whitespace-only before the MIME delimiter. OK, * just advance p_in to past the new MIMEHDR_INIT, * and prepare to process the new MIME charset/encoding * header. */ p_in = p + sizeof(MIMEHDR_INIT) - 1; state = S_SKIP_MIMEINIT; } } break; } } *p_out = '\0'; } /* * Routines for decoding body-parts of a message. * * Since the "fetch" part of fetchmail gets a message body * one line at a time, we need to maintain some state variables * across multiple invokations of the UnMimeBodyline() routine. * The driver routine should call MimeBodyType() when all * headers have been received, and then UnMimeBodyline() for * every line in the message body. * */ #define S_BODY_DATA 0 #define S_BODY_HDR 1 /* * Flag indicating if we are currently processing * the headers or the body of a (multipart) message. */ static int BodyState = S_BODY_DATA; /* * Flag indicating if we are in the process of decoding * a quoted-printable body part. */ static int CurrEncodingIsQP = 0; static int CurrTypeNeedsDecode = 0; /* * Delimiter for multipart messages. RFC 2046 states that this must * NEVER be longer than 70 characters. Add 3 for the two hyphens * at the beginning, and a terminating null. */ #define MAX_DELIM_LEN 70 static char MultipartDelimiter[MAX_DELIM_LEN+3]; /* This string replaces the "Content-Transfer-Encoding: quoted-printable" * string in all headers, including those in body-parts. The replacement * must be no longer than the original string. */ static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit"; static void SetEncoding8bit(char *XferEncOfs) { char *p; if (XferEncOfs != NULL) { memcpy(XferEncOfs, ENC8BIT, sizeof(ENC8BIT) - 1); /* If anything left, in this header, replace with whitespace */ for (p=XferEncOfs+sizeof(ENC8BIT)-1; ((unsigned char)*p >= ' '); p++) *p=' '; } } static char *GetBoundary(char *CntType) { char *p1, *p2; int flag; /* Find the "boundary" delimiter. It must be preceded with a ';' * and optionally some whitespace. */ p1 = CntType; do { p2 = strchr(p1, ';'); if (p2) for (p2++; isspace((unsigned char)*p2); p2++); p1 = p2; } while ((p1) && (strncasecmp(p1, "boundary", 8) != 0)); if (p1 == NULL) /* No boundary delimiter */ return NULL; /* Skip "boundary", whitespace and '='; check that we do have a '=' */ for (p1+=8, flag=0; (isspace((unsigned char)*p1) || (*p1 == '=')); p1++) flag |= (*p1 == '='); if (!flag) return NULL; /* Find end of boundary delimiter string */ if (*p1 == '\"') { /* The delimiter is inside quotes */ p1++; p2 = strchr(p1, '\"'); if (p2 == NULL) return NULL; /* No closing '"' !?! */ } else { /* There might be more text after the "boundary" string. */ p2 = strchr(p1, ';'); /* Safe - delimiter with ';' must be in quotes */ } /* Zero-terminate the boundary string */ if (p2 != NULL) *p2 = '\0'; return (p1 && strlen(p1)) ? p1 : NULL; } static int CheckContentType(char *CntType) { /* * Static array of Content-Type's for which we will do * quoted-printable decoding, if requested. * It is probably wise to do this only on known text-only types; * be really careful if you change this. */ static char *DecodedTypes[] = { "text/", /* Will match ALL content-type's starting with 'text/' */ "message/rfc822", NULL }; char *p = CntType; int i; /* If no Content-Type header, it isn't MIME - don't touch it */ if (CntType == NULL) return 0; /* Skip whitespace, if any */ for (; isspace((unsigned char)*p); p++) ; for (i=0; (DecodedTypes[i] && (strncasecmp(p, DecodedTypes[i], strlen(DecodedTypes[i])))); i++) ; return (DecodedTypes[i] != NULL); } /* * This routine does three things: * 1) It determines - based on the message headers - whether the * message body is a MIME message that may hold 8 bit data. * - A message that has a "quoted-printable" or "8bit" transfer * encoding is assumed to contain 8-bit data (when decoded). * - A multipart message is assumed to contain 8-bit data * when decoded (there might be quoted-printable body-parts). * - All other messages are assumed NOT to include 8-bit data. * 2) It determines the delimiter-string used in multi-part message * bodies. * 3) It sets the initial values of the CurrEncodingIsQP, * CurrTypeNeedsDecode, and BodyState variables, from the header * contents. * * The return value is a bitmask. */ int MimeBodyType(char *hdrs, int WantDecode) { char *NxtHdr = hdrs; char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p; int HdrsFound = 0; /* We only look for three headers */ int BodyType; /* Return value */ /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */ MultipartDelimiter[0] = '\0'; CurrEncodingIsQP = CurrTypeNeedsDecode = 0; BodyState = S_BODY_DATA; BodyType = 0; /* Just in case ... */ if (hdrs == NULL) return BodyType; XferEnc = XferEncOfs = CntType = MimeVer = NULL; do { if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) { XferEncOfs = NxtHdr; p = nxtaddr(NxtHdr); if (p != NULL) { xfree(XferEnc); XferEnc = xstrdup(p); HdrsFound++; } } else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) { /* * This one is difficult. We cannot use the standard * nxtaddr() routine, since the boundary-delimiter is * (probably) enclosed in quotes - and thus appears * as an rfc822 comment, and nxtaddr() "eats" up any * spaces in the delimiter. So, we have to do this * by hand. */ /* Skip the "Content-Type:" part and whitespace after it */ for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == '\t')); NxtHdr++); /* * Get the full value of the Content-Type header; * it might span multiple lines. So search for * a newline char, but ignore those that have a * have a TAB or space just after the NL (continued * lines). */ p = NxtHdr-1; do { p=strchr((p+1),'\n'); } while ( (p != NULL) && ((*(p+1) == '\t') || (*(p+1) == ' ')) ); if (p == NULL) p = NxtHdr + strlen(NxtHdr); xfree(CntType); CntType = (char *)xmalloc(p-NxtHdr+1); strlcpy(CntType, NxtHdr, p-NxtHdr+1); HdrsFound++; } else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0)
Note that this file is kept for historic reference.
It will no longer be updated or maintained.

-- Matthias Andree, 2010-02-06

SVN release - fetchmail release    - release manager
====================================================
              SORT_BY (release)
r5480       - 6.3.14   (2010-02-05) - MA
r5450       - 6.3.13   (2009-10-30) - MA
r5439       - 6.3.12   (2009-10-05) - MA
r5398       - 6.3.11   (2009-08-06) - MA
r5373       - 6.3.10   (2009-07-02) - MA
r5248       - 6.3.9    (2008-11-16) - MA
r5093       - 6.3.8    (2007-04-06) - MA
r5037       - 6.3.7    (2007-02-18) - MA
r5010       - 6.3.6    (2007-01-05) - MA
r4921       - 6.3.5    (2006-10-09) - MA
r4802       - 6.3.4    (2006-04-14) - MA
r4760       - 6.3.3    (2006-03-31) - MA
r4678       - 6.3.2    (2006-01-22) - MA
r4579       - 6.3.1    (2005-12-19) - MA
r4573       - 6.2.5.5  (2005-12-19) - MA
r4499       - 6.3.0    (2005-11-30) - MA
r4439       - 6.2.5.4  (2005-11-13) - MA
r4430       - 6.2.5.3  (2005-11-13) - MA
(tarball)   - 6.2.5.2  (2005-07-22) - MA
(tarball)   - 6.2.5.1  (2005-07-20) - MA
-- maintainer change --
r3860       - 6.2.5    (2003-10-15) - ESR
r3835       - 6.2.4    (2003-08-13) - ESR
r3826       - 6.2.3    (2003-07-17) - ESR
r3806       - 6.2.2    (2003-03-01) - ESR
r3783       - 6.2.1    (2003-01-14) - ESR
r3776       - 6.2.0    (2002-12-13) - ESR
r3773       - 6.1.3    (2002-11-28) - ESR
r3765       - 6.1.2    (2002-10-31) - ESR
r3755       - 6.1.1    (2002-10-18) - ESR
r3731       - 6.1.0    (2002-09-23) - ESR
r3726       - 6.0.0    (2002-09-18) - ESR
r3703       - 5.9.14   (2002-09-06) - ESR
r3655       - 5.9.13   (2002-06-23) - ESR
r3632       - 5.9.12   (2002-06-04) - ESR
r3608       - 5.9.11   (2002-04-02) - ESR
r3598       - 5.9.10   (2002-03-10) - ESR
r3593       - 5.9.9    (2002-03-09) - ESR
r3581       - 5.9.8    (2002-02-15) - ESR
r3576       - 5.9.7    (2002-02-02) - ESR
r3564       - 5.9.6    (2001-12-14) - ESR
r3548       - 5.9.5    (2001-11-08) - ESR
r3534       - 5.9.4    (2001-10-03) - ESR
r3514       - 5.9.3    (2001-09-30) - ESR
r3493       - 5.9.2    (2001-09-26) - ESR
r3481       - 5.9.1    (2001-09-25) - ESR
r3453       - 5.9.0    (2001-08-13) - ESR
r3447       - 5.8.17   (2001-08-08) - ESR
r3438       - 5.8.16   (2001-08-