diff options
Diffstat (limited to 'unmime.c')
-rw-r--r-- | unmime.c | 652 |
1 files changed, 652 insertions, 0 deletions
diff --git a/unmime.c b/unmime.c new file mode 100644 index 00000000..e1690c27 --- /dev/null +++ b/unmime.c @@ -0,0 +1,652 @@ +/* + * MIME mail decoding. + * + * This module contains decoding routines for converting + * quoted-printable data into pure 8-bit data, in MIME + * formatted messages. + * + * By Henrik Storner <storner@image.dk> + * + * Configuration file support for fetchmail 4.3.8 by + * Frank Damgaard <frda@post3.tele.dk> + * + */ + +#include <string.h> +#include <stdlib.h> +#include <ctype.h> +#include "fetchmail.h" + +static inline unsigned char unhex(unsigned char c) +{ + if ((c >= '0') && (c <= '9')) + return (c - '0'); + else if ((c >= 'A') && (c <= 'F')) + return (c - 'A' + 10); + else if ((c >= 'a') && (c <= 'f')) + return (c - 'a' + 10); + else + return c; +} + +static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out) +{ + c1 = unhex(c1); + c2 = unhex(c2); + + if ((c1 > 15) || (c2 > 15)) + return 1; + else { + *c_out = 16*c1+c2; + return 0; + } +} + + + +/* + * Routines to decode MIME QP-encoded headers, as per RFC 2047. + */ + +/* States of the decoding state machine */ +#define S_COPY_PLAIN 0 /* Just copy, but watch for the QP flag */ +#define S_SKIP_MIMEINIT 1 /* Get the encoding, and skip header */ +#define S_COPY_MIME 2 /* Decode a sequence of coded characters */ + +static const char MIMEHDR_INIT[] = "=?"; /* Start of coded sequence */ +static const char MIMEHDR_END[] = "?="; /* End of coded sequence */ + +void UnMimeHeader(unsigned char *hdr) +{ + /* Decode a buffer containing data encoded according to RFC + * 2047. This only handles content-transfer-encoding; conversion + * between character sets is not implemented. In other words: We + * assume the charsets used can be displayed by your mail program + * without problems. + */ + + /* Note: Decoding is done "in-situ", i.e. without using an + * additional buffer for temp. storage. This is possible, since the + * decoded string will always be shorter than the encoded string, + * due to the en- coding scheme. + */ + + int state = S_COPY_PLAIN; + unsigned char *p_in, *p_out, *p; + unsigned char enc; + int i; + + /* Speed up in case this is not a MIME-encoded header */ + p = strstr(hdr, MIMEHDR_INIT); + if (p == NULL) + return; /* No MIME header */ + + /* Loop through the buffer. + * p_in : Next char to be processed. + * p_out: Where to put the next processed char + * enc : Encoding used (usually, 'q' = quoted-printable) + */ + for (p_out = p_in = hdr; (*p_in); ) { + switch (state) { + case S_COPY_PLAIN: + p = strstr(p_in, MIMEHDR_INIT); + if (p == NULL) { + /* + * No more coded data in buffer, + * just move remainder into place. + */ + i = strlen(p_in); /* How much left */ + memmove(p_out, p_in, i); + p_in += i; p_out += i; + } + else { + /* MIME header init found at location p */ + if (p > p_in) { + /* There are some uncoded chars at the beginning. */ + i = (p - p_in); + memmove(p_out, p_in, i); + p_out += i; + } + p_in = (p + 2); + state = S_SKIP_MIMEINIT; + } + break; + + case S_SKIP_MIMEINIT: + /* Mime type definition: "charset?encoding?" */ + p = strchr(p_in, '?'); + if (p != NULL) { + /* p_in .. (p-1) holds the charset */ + + /* *(p+1) is the transfer encoding, *(p+2) must be a '?' */ + if (*(p+2) == '?') { + enc = tolower(*(p+1)); + p_in = p+3; + state = S_COPY_MIME; + } + else + state = S_COPY_PLAIN; + } + else + state = S_COPY_PLAIN; /* Invalid data */ + break; + + case S_COPY_MIME: + p = strstr(p_in, MIMEHDR_END); /* Find end of coded data */ + if (p == NULL) p = p_in + strlen(p_in); + for (; (p_in < p); ) { + /* Decode all encoded data */ + if (enc == 'q') { + if (*p_in == '=') { + /* Decode one char qp-coded at (p_in+1) and (p_in+2) */ + if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0) + p_in += 3; + else { + /* Invalid QP data - pass through unchanged. */ + *p_out = *p_in; + p_in++; + } + } + else if (*p_in == '_') { + /* + * RFC 2047: '_' inside encoded word represents 0x20. + * NOT a space - always the value 0x20. + */ + *p_out = 0x20; + p_in++; + } + else { + /* Copy unchanged */ + *p_out = *p_in; + p_in++; + } + p_out++; + } + else if (enc == 'b') { + /* Decode base64 encoded data */ + char delimsave; + int decoded_count; + + delimsave = *p; *p = '\r'; + decoded_count = from64tobits(p_out, p_in); + *p = delimsave; + if (decoded_count > 0) + p_out += decoded_count; + p_in = p; + } + else { + /* Copy unchanged */ + *p_out = *p_in; + p_in++; + p_out++; + } + } + if (*p_in) + p_in += 2; /* Skip the MIMEHDR_END delimiter */ + + /* + * We've completed decoding one encoded sequence. But another + * may follow immediately, in which case whitespace before the + * new MIMEHDR_INIT delimiter must be discarded. + * See if that is the case + */ + p = strstr(p_in, MIMEHDR_INIT); + state = S_COPY_PLAIN; + if (p != NULL) { + /* + * There is more MIME data later on. Is there + * whitespace only before the delimiter? + */ + unsigned char *q; + int wsp_only = 1; + + for (q=p_in; (wsp_only && (q < p)); q++) + wsp_only = isspace(*q); + + if (wsp_only) { + /* + * Whitespace-only before the MIME delimiter. OK, + * just advance p_in to past the new MIMEHDR_INIT, + * and prepare to process the new MIME charset/encoding + * header. + */ + p_in = p + strlen(MIMEHDR_INIT); + state = S_SKIP_MIMEINIT; + } + } + break; + } + } + + *p_out = '\0'; +} + + + +/* + * Routines for decoding body-parts of a message. + * + * Since the "fetch" part of fetchmail gets a message body + * one line at a time, we need to maintain some state variables + * across multiple invokations of the UnMimeBodyline() routine. + * The driver routine should call MimeBodyType() when all + * headers have been received, and then UnMimeBodyline() for + * every line in the message body. + * + */ +#define S_BODY_DATA 0 +#define S_BODY_HDR 1 + +/* + * Flag indicating if we are currently processing + * the headers or the body of a (multipart) message. + */ +static int BodyState = S_BODY_DATA; + +/* + * Flag indicating if we are in the process of decoding + * a quoted-printable body part. + */ +static int CurrEncodingIsQP = 0; + +/* + * Delimiter for multipart messages. RFC 2046 states that this must + * NEVER be longer than 70 characters. Add 3 for the two hyphens + * at the beginning, and a terminating null. + */ +#define MAX_DELIM_LEN 70 +static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3]; + + +/* This string replaces the "Content-Transfer-Encoding: quoted-printable" + * string in all headers, including those in body-parts. It must be + * no longer than the original string. + */ +static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit"; +static void SetEncoding8bit(unsigned char *XferEncOfs) +{ + unsigned char *p; + + if (XferEncOfs != NULL) { + memcpy(XferEncOfs, ENC8BIT, strlen(ENC8BIT)); + + /* If anything left, in this header, replace with whitespace */ + for (p=XferEncOfs+strlen(ENC8BIT); (*p >= ' '); p++) *p=' '; + } +} + + +/* + * This routine does three things: + * 1) It determines - based on the message headers - whether the + * message body is a MIME message that may hold 8 bit data. + * - A message that has a "quoted-printable" or "8bit" transfer + * encoding is assumed to contain 8-bit data (when decoded). + * - A multipart message is assumed to contain 8-bit data + * when decoded (there might be quoted-printable body-parts). + * - All other messages are assumed NOT to include 8-bit data. + * 2) It determines the delimiter-string used in multi-part message + * bodies. + * 3) It sets the initial values of the CurrEncodingIsQP and BodyState + * variables, from the header contents. + * + * The return value is a bitmask. + */ +int MimeBodyType(unsigned char *hdrs) +{ + unsigned char *NxtHdr = hdrs; + unsigned char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p; + int HdrsFound = 0; /* We only look for three headers */ + int BodyType; /* Return value */ + + /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */ + MultipartDelimiter[0] = '\0'; + CurrEncodingIsQP = 0; + BodyState = S_BODY_DATA; + BodyType = 0; + + /* Just in case ... */ + if (hdrs == NULL) + return BodyType; + + XferEnc = XferEncOfs = CntType = MimeVer = NULL; + + do { + if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) { + XferEncOfs = NxtHdr; + p = nxtaddr(NxtHdr); + if (p != NULL) { + XferEnc = (char *)xmalloc(strlen(p) + 1); + strcpy(XferEnc, p); + HdrsFound++; + } + } + else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) { + /* + * This one is difficult. We cannot use the standard + * nxtaddr() routine, since the boundary-delimiter is + * (probably) enclosed in quotes - and thus appears + * as an rfc822 comment, and nxtaddr() "eats" up any + * spaces in the delimiter. So, we have to do this + * by hand. + */ + + /* Skip the "Content-Type:" part and whitespace after it */ + for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == '\t')); NxtHdr++); + + /* + * Get the full value of the Content-Type header; + * it might span multiple lines. So search for + * a newline char, but ignore those that have a + * have a TAB or space just after the NL (continued + * lines). + */ + p = NxtHdr-1; + do { + p=strchr((p+1),'\n'); + } while ( (p != NULL) && ((*(p+1) == '\t') || (*(p+1) == ' ')) ); + if (p == NULL) p = NxtHdr + strlen(NxtHdr); + + CntType = (char *)xmalloc(p-NxtHdr+2); + strncpy(CntType, NxtHdr, (p-NxtHdr)); + *(CntType+(p-NxtHdr)) = '\0'; + HdrsFound++; + } + else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0) { + p = nxtaddr(NxtHdr); + if (p != NULL) { + MimeVer = (char *)xmalloc(strlen(p) + 1); + strcpy(MimeVer, p); + HdrsFound++; + } + } + + NxtHdr = (strchr(NxtHdr, '\n')); + if (NxtHdr != NULL) NxtHdr++; + } while ((NxtHdr != NULL) && (*NxtHdr) && (HdrsFound != 3)); + + + /* Done looking through the headers, now check what they say */ + if ((MimeVer != NULL) && (strcmp(MimeVer, "1.0") == 0)) { + + /* Check Content-Type to see if this is a multipart message */ + if (CntType != NULL) { + if ((strncasecmp(CntType, "multipart/", 10) == 0) || + (strncasecmp(CntType, "message/", 8) == 0)) { + + char *p1, *p2; + + /* Search for "boundary=" */ + p1 = strchr(CntType, '='); + if (p1 != NULL) { + /* Skip the '=' and any whitespace after it */ + for (p1++; (isspace(*p1)); p1++); + + /* The delimiter might be inside quotes */ + if (*p1 == '\"') { + p1++; + p2 = strchr(p1, '\"'); + if (p2 != NULL) + *p2 = '\0'; + } + + if (strlen(p1) > 0) { + /* The actual delimiter is "--" followed by + the boundary string */ + strcpy(MultipartDelimiter, "--"); + strncat(MultipartDelimiter, p1, MAX_DELIM_LEN); + BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE); + } + } + } + } + + /* + * Check Content-Transfer-Encoding, but + * ONLY for non-multipart messages (BodyType == 0). + */ + if ((XferEnc != NULL) && (BodyType == 0)) { + if (strcasecmp(XferEnc, "quoted-printable") == 0) { + CurrEncodingIsQP = 1; + BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE); + SetEncoding8bit(XferEncOfs); + } + else if (strcasecmp(XferEnc, "7bit") == 0) { + CurrEncodingIsQP = 0; + BodyType = (MSG_IS_7BIT); + } + else if (strcasecmp(XferEnc, "8bit") == 0) { + CurrEncodingIsQP = 0; + BodyType = (MSG_IS_8BIT); + } + } + + } + + if (MimeVer) free(MimeVer); + if (XferEnc) free(XferEnc); + if (CntType) free(CntType); + + return BodyType; +} + + +/* + * Decode one line of data containing QP data. + * Return flag set if this line ends with a soft line-break. + * 'bufp' is modified to point to the end of the output buffer. + */ +static int DoOneQPLine(unsigned char **bufp, int collapsedoubledot) +{ + unsigned char *buf = *bufp; + unsigned char *p_in, *p_out, *p; + int n; + int ret = 0; + + p_in = buf; + if (collapsedoubledot && (strncmp(buf, "..", 2) == 0)) + p_in++; + + for (p_out = buf; (*p_in); ) { + p = strchr(p_in, '='); + if (p == NULL) { + /* No more QP data, just move remainder into place */ + n = strlen(p_in); + memmove(p_out, p_in, n); + p_in += n; p_out += n; + } + else { + if (p > p_in) { + /* There are some uncoded chars at the beginning. */ + n = (p - p_in); + memmove(p_out, p_in, n); + p_out += n; + } + + switch (*(p+1)) { + case '\0': case '\r': case '\n': + /* Soft line break, skip '=' */ + p_in = p+1; + if (*p_in == '\r') p_in++; + if (*p_in == '\n') p_in++; + ret = 1; + break; + + default: + /* There is a QP encoded byte */ + if (qp_char(*(p+1), *(p+2), p_out) == 0) { + p_in = p+3; + } + else { + /* Invalid QP data - pass through unchanged. */ + *p_out = '='; + p_in = p+1; + } + p_out++; + break; + } + } + } + + *p_out = '\0'; + *bufp = p_out; + return ret; +} + + +/* This is called once per line in the message body. We need to scan + * all lines in the message body for the multipart delimiter string, + * and handle any body-part headers in such messages (these can toggle + * qp-decoding on and off). + * + * Note: Messages that are NOT multipart-messages go through this + * routine quickly, since BodyState will always be S_BODY_DATA, + * and MultipartDelimiter is NULL. + * + * Return flag set if this line ends with a soft line-break. + * 'bufp' is modified to point to the end of the output buffer. + */ + +int UnMimeBodyline(unsigned char **bufp, int collapsedoubledot) +{ + unsigned char *buf = *bufp; + int ret = 0; + + switch (BodyState) { + case S_BODY_HDR: + UnMimeHeader(buf); /* Headers in body-parts can be encoded, too! */ + if (strncasecmp("Content-Transfer-Encoding:", buf, 26) == 0) { + char *XferEnc; + + XferEnc = nxtaddr(buf); + if ((XferEnc != NULL) && (strcasecmp(XferEnc, "quoted-printable") == 0)) { + CurrEncodingIsQP = 1; + SetEncoding8bit(buf); + } + } + else if ((*buf == '\0') || (*buf == '\n') || (strcmp(buf, "\r\n") == 0)) + BodyState = S_BODY_DATA; + + *bufp = (buf + strlen(buf)); + break; + + case S_BODY_DATA: + if ((*MultipartDelimiter) && + (strncmp(buf, MultipartDelimiter, strlen(MultipartDelimiter)) == 0)) { + BodyState = S_BODY_HDR; + CurrEncodingIsQP = 0; + } + + if (CurrEncodingIsQP) + ret = DoOneQPLine(bufp, collapsedoubledot); + else + *bufp = (buf + strlen(buf)); + break; + } + + return ret; +} + + +#ifdef STANDALONE +#include <stdio.h> +#include <unistd.h> + +char *program_name = "unmime"; + +#define BUFSIZE_INCREMENT 4096 + +#ifdef DEBUG +#define DBG_FWRITE(B,L,BS,FD) fwrite(B, L, BS, FD) +#else +#define DBG_FWRITE(B,L,BS,FD) +#endif + +int main(int argc, char *argv[]) +{ + unsigned int BufSize; + unsigned char *buffer, *buf_p; + int nl_count, i, bodytype; + +#ifdef DEBUG + pid_t pid; + FILE *fd_orig, *fd_conv; + char fnam[100]; + + pid = getpid(); + sprintf(fnam, "/tmp/i_unmime.%x", pid); + fd_orig = fopen(fnam, "w"); + sprintf(fnam, "/tmp/o_unmime.%x", pid); + fd_conv = fopen(fnam, "w"); +#endif + + BufSize = BUFSIZE_INCREMENT; /* Initial size of buffer */ + buf_p = buffer = (unsigned char *) xmalloc(BufSize); + nl_count = 0; + + do { + i = fread(buf_p, 1, 1, stdin); + switch (*buf_p) { + case '\n': + nl_count++; + break; + + case '\r': + break; + + default: + nl_count = 0; + break; + } + + buf_p++; + if ((buf_p - buffer) == BufSize) { + /* Buffer is full! Get more room. */ + buffer = xrealloc(buffer, BufSize+BUFSIZE_INCREMENT); + buf_p = buffer + BufSize; + BufSize += BUFSIZE_INCREMENT; + } + } while ((i > 0) && (nl_count < 2)); + + *buf_p = '\0'; + DBG_FWRITE(buffer, strlen(buffer), 1, fd_orig); + + UnMimeHeader(buffer); + bodytype = MimeBodyType(buffer); + + i = strlen(buffer); + fwrite(buffer, i, 1, stdout); + DBG_FWRITE(buffer, i, 1, fd_conv); + + do { + buf_p = (buffer - 1); + do { + buf_p++; + i = fread(buf_p, 1, 1, stdin); + } while ((i == 1) && (*buf_p != '\n')); + if (i == 1) buf_p++; + *buf_p = '\0'; + DBG_FWRITE(buf, (buf_p - buffer), 1, fd_orig); + + if (buf_p > buffer) { + if (bodytype & MSG_NEEDS_DECODE) { + buf_p = buffer; + UnMimeBodyline(&buf_p, 0); + } + fwrite(buffer, (buf_p - buffer), 1, stdout); + DBG_FWRITE(buffer, (buf_p - buffer), 1, fd_conv); + } + } while (buf_p > buffer); + + free(buffer); + fflush(stdout); + +#ifdef DEBUG + fclose(fd_orig); + fclose(fd_conv); +#endif + + return 0; +} +#endif + |