/* * MIME mail decoding. * * This module contains decoding routines for converting * quoted-printable data into pure 8-bit data, in MIME * formatted messages. * * By Henrik Storner <storner@image.dk> * * Configuration file support for fetchmail 4.3.8 by * Frank Damgaard <frda@post3.tele.dk> * * For license terms, see the file COPYING in this directory. */ #include "config.h" #include <string.h> #include <stdlib.h> #include <stdio.h> #include <ctype.h> #include "fetchmail.h" #include "i18n.h" static unsigned char unhex(unsigned char c) { if ((c >= '0') && (c <= '9')) return (c - '0'); else if ((c >= 'A') && (c <= 'F')) return (c - 'A' + 10); else if ((c >= 'a') && (c <= 'f')) return (c - 'a' + 10); else return 16; /* invalid hex character */ } static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out) { c1 = unhex(c1); c2 = unhex(c2); if ((c1 > 15) || (c2 > 15)) return 1; else { *c_out = 16*c1+c2; return 0; } } /* * Routines to decode MIME QP-encoded headers, as per RFC 2047. */ /* States of the decoding state machine */ #define S_COPY_PLAIN 0 /* Just copy, but watch for the QP flag */ #define S_SKIP_MIMEINIT 1 /* Get the encoding, and skip header */ #define S_COPY_MIME 2 /* Decode a sequence of coded characters */ static const char MIMEHDR_INIT[] = "=?"; /* Start of coded sequence */ static const char MIMEHDR_END[] = "?="; /* End of coded sequence */ void UnMimeHeader(unsigned char *hdr) { /* Decode a buffer containing data encoded according to RFC * 2047. This only handles content-transfer-encoding; conversion * between character sets is not implemented. In other words: We * assume the charsets used can be displayed by your mail program * without problems. */ /* Note: Decoding is done "in-situ", i.e. without using an * additional buffer for temp. storage. This is possible, since the * decoded string will always be shorter than the encoded string, * due to the encoding scheme. */ int state = S_COPY_PLAIN; unsigned char *p_in, *p_out, *p; unsigned char enc = '\0'; /* initialization pacifies -Wall */ int i; /* Speed up in case this is not a MIME-encoded header */ p = strstr(hdr, MIMEHDR_INIT); if (p == NULL) return; /* No MIME header */ /* Loop through the buffer. * p_in : Next char to be processed. * p_out: Where to put the next processed char * enc : Encoding used (usually, 'q' = quoted-printable) */ for (p_out = p_in = hdr; (*p_in); ) { switch (state) { case S_COPY_PLAIN: p = strstr(p_in, MIMEHDR_INIT); if (p == NULL) { /* * No more coded data in buffer, * just move remainder into place. */ i = strlen(p_in); /* How much left */ memmove(p_out, p_in, i); p_in += i; p_out += i; } else { /* MIME header init found at location p */ if (p > p_in) { /* There are some uncoded chars at the beginning. */ i = (p - p_in); memmove(p_out, p_in, i); p_out += i; } p_in = (p + 2); state = S_SKIP_MIMEINIT; } break; case S_SKIP_MIMEINIT: /* Mime type definition: "charset?encoding?" */ p = strchr(p_in, '?'); if (p != NULL) { /* p_in .. (p-1) holds the charset */ /* *(p+1) is the transfer encoding, *(p+2) must be a '?' */ if (*(p+2) == '?') { enc = tolower(*(p+1)); p_in = p+3; state = S_COPY_MIME; } else state = S_COPY_PLAIN; } else state = S_COPY_PLAIN; /* Invalid data */ break; case S_COPY_MIME: p = strstr(p_in, MIMEHDR_END); /* Find end of coded data */ if (p == NULL) p = p_in + strlen(p_in); for (; (p_in < p); ) { /* Decode all encoded data */ if (enc == 'q') { if (*p_in == '=') { /* Decode one char qp-coded at (p_in+1) and (p_in+2) */ if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0) p_in += 3; else { /* Invalid QP data - pass through unchanged. */ *p_out = *p_in; p_in++; } } else if (*p_in == '_') { /* * RFC 2047: '_' inside encoded word represents 0x20. * NOT a space - always the value 0x20. */ *p_out = 0x20; p_in++; } else { /* Copy unchanged */ *p_out = *p_in; p_in++; } p_out++; } else if (enc == 'b') { /* Decode base64 encoded data */ char delimsave; int decoded_count; delimsave = *p; *p = '\r'; decoded_count = from64tobits(p_out, p_in, 0); *p = delimsave; if (decoded_count > 0) p_out += decoded_count; p_in = p; } else { /* Copy unchanged */ *p_out = *p_in; p_in++; p_out++; } } if (*p_in) p_in += 2; /* Skip the MIMEHDR_END delimiter */ /* * We've completed decoding one encoded sequence. But another * may follow immediately, in which case whitespace before the * new MIMEHDR_INIT delimiter must be discarded. * See if that is the case */ p = strstr(p_in, MIMEHDR_INIT); state = S_COPY_PLAIN; if (p != NULL) { /* * There is more MIME data later on. Is there * whitespace only before the delimiter? */ unsigned char *q; int wsp_only = 1; for (q=p_in; (wsp_only && (q < p)); q++) wsp_only = isspace(*q); if (wsp_only) { /* * Whitespace-only before the MIME delimiter. OK, * just advance p_in to past the new MIMEHDR_INIT, * and prepare to process the new MIME charset/encoding * header. */ p_in = p + sizeof(MIMEHDR_INIT) - 1; state = S_SKIP_MIMEINIT; } } break; } } *p_out = '\0'; } /* * Routines for decoding body-parts of a message. * * Since the "fetch" part of fetchmail gets a message body * one line at a time, we need to maintain some state variables * across multiple invokations of the UnMimeBodyline() routine. * The driver routine should call MimeBodyType() when all * headers have been received, and then UnMimeBodyline() for * every line in the message body. * */ #define S_BODY_DATA 0 #define S_BODY_HDR 1 /* * Flag indicating if we are currently processing * the headers or the body of a (multipart) message. */ static int BodyState = S_BODY_DATA; /* * Flag indicating if we are in the process of decoding * a quoted-printable body part. */ static int CurrEncodingIsQP = 0; static int CurrTypeNeedsDecode = 0; /* * Delimiter for multipart messages. RFC 2046 states that this must * NEVER be longer than 70 characters. Add 3 for the two hyphens * at the beginning, and a terminating null. */ #define MAX_DELIM_LEN 70 static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3]; /* This string replaces the "Content-Transfer-Encoding: quoted-printable" * string in all headers, including those in body-parts. The replacement * must be no longer than the original string. */ static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit"; static void SetEncoding8bit(unsigned char *XferEncOfs) { unsigned char *p; if (XferEncOfs != NULL) { memcpy(XferEncOfs, ENC8BIT, sizeof(ENC8BIT) - 1); /* If anything left, in this header, replace with whitespace */ for (p=XferEncOfs+sizeof(ENC8BIT)-1; (*p >= ' '); p++) *p=' '; } } static char *GetBoundary(char *CntType) { char *p1, *p2; int flag; /* Find the "boundary" delimiter. It must be preceded with a ';' * and optionally some whitespace. */ p1 = CntType; do { p2 = strchr(p1, ';'); if (p2) for (p2++; isspace(*p2); p2++); p1 = p2; } while ((p1) && (strncasecmp(p1, "boundary", 8) != 0)); if (p1 == NULL) /* No boundary delimiter */ return NULL; /* Skip "boundary", whitespace and '='; check that we do have a '=' */ for (p1+=8, flag=0; (isspace(*p1) || (*p1 == '=')); p1++) flag |= (*p1 == '='); if (!flag) return NULL; /* Find end of boundary delimiter string */ if (*p1 == '\"') { /* The delimiter is inside quotes */ p1++; p2 = strchr(p1, '\"'); if (p2 == NULL) return NULL; /* No closing '"' !?! */ } else { /* There might be more text after the "boundary" string. */ p2 = strchr(p1, ';'); /* Safe - delimiter with ';' must be in quotes */ } /* Zero-terminate the boundary string */ if (p2 != NULL) *p2 = '\0'; return (p1 && strlen(p1)) ? p1 : NULL; } int CheckContentType(char *CntType) { /* * Static array of Content-Type's for which we will do * quoted-printable decoding, if requested. * It is probably wise to do this only on known text-only types; * be really careful if you change this. */ static char *DecodedTypes[] = { "text/", /* Will match ALL content-type's starting with 'text/' */ "message/rfc822", NULL }; char *p = CntType; int i; /* If no Content-Type header, it isn't MIME - don't touch it */ if (CntType == NULL) return 0; /* Skip whitespace, if any */ for (; isspace(*p); p++) ; for (i=0; (DecodedTypes[i] && (strncasecmp(p, DecodedTypes[i], strlen(DecodedTypes[i])))); i++) ; return (DecodedTypes[i] != NULL); } /* * This routine does three things: * 1) It determines - based on the message headers - whether the * message body is a MIME message that may hold 8 bit data. * - A message that has a "quoted-printable" or "8bit" transfer * encoding is assumed to contain 8-bit data (when decoded). * - A multipart message is assumed to contain 8-bit data * when decoded (there might be quoted-printable body-parts). * - All other messages are assumed NOT to include 8-bit data. * 2) It determines the delimiter-string used in multi-part message * bodies. * 3) It sets the initial values of the CurrEncodingIsQP, * CurrTypeNeedsDecode, and BodyState variables, from the header * contents. * * The return value is a bitmask. */ int MimeBodyType(unsigned char *hdrs, int WantDecode) { unsigned char *NxtHdr = hdrs; unsigned char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p; int HdrsFound = 0; /* We only look for three headers */ int BodyType; /* Return value */ /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */ MultipartDelimiter[0] = '\0'; CurrEncodingIsQP = CurrTypeNeedsDecode = 0; BodyState = S_BODY_DATA; BodyType = 0; /* Just in case ... */ if (hdrs == NULL) return BodyType; XferEnc = XferEncOfs = CntType = MimeVer = NULL; do { if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) { XferEncOfs = NxtHdr; p = nxtaddr(NxtHdr); if (p != NULL) { xalloca(XferEnc, char *, strlen(p) + 1); strcpy(XferEnc, p); HdrsFound++; } } else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) { /* * This one is difficult. We cannot use the standard * nxtaddr() routine, since the boundary-delimiter is * (probably) enclosed in quotes - and thus appears * as an rfc822 comment, and nxtaddr() "eats" up any * spaces in the delimiter. So, we have to do this * by hand. */ /* Skip the "Content-Type:" part and whitespace after it */ for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == '\t')); NxtHdr++); /* * Get the full value of the Content-Type header; * it might span multiple lines. So search for * a newline char, but ignore those that have a * have a TAB or space just after the NL (continued * lines). */ p = NxtHdr-1; do { p=strchr((p+1),'\n'); } while ( (p != NULL) && ((*(p+1) == '\t') || (*(p+1) == ' ')) ); if (p == NULL) p = NxtHdr + strlen(NxtHdr); xalloca(CntType, char *, p-NxtHdr+2); strncpy(CntType, NxtHdr, (p-NxtHdr)); *(CntType+(p-NxtHdr)) = '\0'; HdrsFound++; } else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0) { p = nxtaddr(NxtHdr); if (p != NULL) { xalloca(MimeVer, char *, strlen(p) + 1); strcpy(MimeVer, p); HdrsFound++; } } NxtHdr = (strchr(NxtHdr, '\n')); if (NxtHdr != NULL) NxtHdr++; } while ((NxtHdr != NULL) && (*NxtHdr) && (HdrsFound != 3)); /* Done looking through the headers, now check what they say */ if ((MimeVer != NULL) && (strcmp(MimeVer, "1.0") == 0)) { CurrTypeNeedsDecode = CheckContentType(CntType); /* Check Content-Type to see if this is a multipart message */ if ( (CntType != NULL) && ((strncasecmp(CntType, "multipart/mixed", 16) == 0) || (strncasecmp(CntType, "message/", 8) == 0)) ) { char *p1 = GetBoundary(CntType); if (p1 != NULL) { /* The actual delimiter is "--" followed by the boundary string */ strcpy(MultipartDelimiter, "--"); strncat(MultipartDelimiter, p1, MAX_DELIM_LEN); BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE); } } /* * Check Content-Transfer-Encoding, but * ONLY for non-multipart messages (BodyType == 0). */ if ((XferEnc != NULL) && (BodyType == 0)) { if (strcasecmp(XferEnc, "quoted-printable") == 0) { CurrEncodingIsQP = 1; BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE); if (WantDecode && CurrTypeNeedsDecode) { SetEncoding8bit(XferEncOfs); } } else if (strcasecmp(XferEnc, "7bit") == 0) { CurrEncodingIsQP = 0; BodyType = (MSG_IS_7BIT); } else if (strcasecmp(XferEnc, "8bit") == 0) { CurrEncodingIsQP = 0; BodyType = (MSG_IS_8BIT); } } } return BodyType; } /* * Decode one line of data containing QP data. * Return flag set if this line ends with a soft line-break. * 'bufp' is modified to point to the end of the output buffer. */ static int DoOneQPLine(unsigned char **bufp, flag delimited, flag issoftline) { unsigned char *buf = *bufp; unsigned char *p_in, *p_out, *p; int n; int ret = 0; /* * Special case: line consists of a single =2E and messages are * dot-terminated. Line has to be dot-stuffed after decoding. */ if (delimited && !issoftline && buf[0]=='=' && !strncmp(*bufp, "=2E\r\n", 5)) { strcpy(buf, "..\r\n"); *bufp += 5; return(FALSE); } p_in = buf; if (delimited && issoftline && (strncmp(buf, "..", 2) == 0)) p_in++; for (p_out = buf; (*p_in); ) { p = strchr(p_in, '='); if (p == NULL) { /* No more QP data, just move remainder into place */ n = strlen(p_in); memmove(p_out, p_in, n); p_in += n; p_out += n; } else { if (p > p_in) { /* There are some uncoded chars at the beginning. */ n = (p - p_in); memmove(p_out, p_in, n); p_out += n; } switch (*(p+1)) { case '\0': case '\r': case '\n': /* Soft line break, skip '=' */ p_in = p+1; if (*p_in == '\r') p_in++; if (*p_in == '\n') p_in++; ret = 1; break; default: /* There is a QP encoded byte */ if (qp_char(*(p+1), *(p+2), p_out) == 0) { p_in = p+3; } else { /* Invalid QP data - pass through unchanged. */ *p_out = '='; p_in = p+1; } p_out++; break; } } } *p_out = '\0'; *bufp = p_out; return ret; } /* This is called once per line in the message body. We need to scan * all lines in the message body for the multipart delimiter string, * and handle any body-part headers in such messages (these can toggle * qp-decoding on and off). * * Note: Messages that are NOT multipart-messages go through this * routine quickly, since BodyState will always be S_BODY_DATA, * and MultipartDelimiter is NULL. * * Return flag set if this line ends with a soft line-break. * 'bufp' is modified to point to the end of the output buffer. */ int UnMimeBodyline(unsigned char **bufp, flag delimited, flag softline) { unsigned char *buf = *bufp; int ret = 0; switch (BodyState) { case S_BODY_HDR: UnMimeHeader(buf); /* Headers in body-parts can be encoded, too! */ if ((*buf == '\0') || (*buf == '\n') || (strcmp(buf, "\r\n") == 0)) { BodyState = S_BODY_DATA; } else if (strncasecmp("Content-Transfer-Encoding:", buf, 26) == 0) { char *XferEnc; XferEnc = nxtaddr(buf); if ((XferEnc != NULL) && (strcasecmp(XferEnc, "quoted-printable") == 0)) { CurrEncodingIsQP = 1; /* * Hmm ... we cannot be really sure that CurrTypeNeedsDecode * has been set - we may not have seen the Content-Type header * yet. But *usually* the Content-Type header comes first, so * this will work. And there is really no way of doing it * "right" as long as we stick with the line-by-line processing. */ if (CurrTypeNeedsDecode) SetEncoding8bit(buf); } } else if (strncasecmp("Content-Type:", buf, 13) == 0) { CurrTypeNeedsDecode = CheckContentType(nxtaddr(buf)); } *bufp = (buf + strlen(buf)); break; case S_BODY_DATA: if ((*MultipartDelimiter) && (strncmp(buf, MultipartDelimiter, strlen(MultipartDelimiter)) == 0)) { BodyState = S_BODY_HDR; CurrEncodingIsQP = CurrTypeNeedsDecode = 0; } if (CurrEncodingIsQP && CurrTypeNeedsDecode) ret = DoOneQPLine(bufp, delimited, softline); else *bufp = (buf + strlen(buf)); break; } return ret; } #ifdef STANDALONE #include <stdio.h> #include <unistd.h> char *program_name = "unmime"; int outlevel = 0; #define BUFSIZE_INCREMENT 4096 #ifdef DEBUG #define DBG_FWRITE(B,L,BS,FD) fwrite(B, L, BS, FD) #else #define DBG_FWRITE(B,L,BS,FD) #endif int main(int argc, char *argv[]) { unsigned int BufSize; unsigned char *buffer, *buf_p; int nl_count, i, bodytype; #ifdef DEBUG pid_t pid; FILE *fd_orig, *fd_conv; char fnam[100]; pid = getpid(); sprintf(fnam, "/tmp/i_unmime.%x", pid); fd_orig = fopen(fnam, "w"); sprintf(fnam, "/tmp/o_unmime.%x", pid); fd_conv = fopen(fnam, "w"); #endif BufSize = BUFSIZE_INCREMENT; /* Initial size of buffer */ buf_p = buffer = (unsigned char *) xmalloc(BufSize); nl_count = 0; do { i = fread(buf_p, 1, 1, stdin); switch (*buf_p) { case '\n': nl_count++; break; case '\r': break; default: nl_count = 0; break; } buf_p++; if ((buf_p - buffer) == BufSize) { /* Buffer is full! Get more room. */ buffer = xrealloc(buffer, BufSize+BUFSIZE_INCREMENT); buf_p = buffer + BufSize; BufSize += BUFSIZE_INCREMENT; } } while ((i > 0) && (nl_count < 2)); *buf_p = '\0'; DBG_FWRITE(buffer, strlen(buffer), 1, fd_orig); UnMimeHeader(buffer); bodytype = MimeBodyType(buffer, 1); i = strlen(buffer); fwrite(buffer, i, 1, stdout); DBG_FWRITE(buffer, i, 1, fd_conv); do { buf_p = (buffer - 1); do { buf_p++; i = fread(buf_p, 1, 1, stdin); } while ((i == 1) && (*buf_p != '\n')); if (i == 1) buf_p++; *buf_p = '\0'; DBG_FWRITE(buf, (buf_p - buffer), 1, fd_orig); if (buf_p > buffer) { if (bodytype & MSG_NEEDS_DECODE) { buf_p = buffer; UnMimeBodyline(&buf_p, 0); } fwrite(buffer, (buf_p - buffer), 1, stdout); DBG_FWRITE(buffer, (buf_p - buffer), 1, fd_conv); } } while (buf_p > buffer); free(buffer); fflush(stdout); #ifdef DEBUG fclose(fd_orig); fclose(fd_conv); #endif return 0; } #endif