Initial revision

svn path=/trunk/; revision=1728
author: Eric S. Raymond <esr@thyrsus.com> 1998-03-26 19:48:28 +0000
committer: Eric S. Raymond <esr@thyrsus.com> 1998-03-26 19:48:28 +0000
commit: f27916a7f27f00e1d2325538e55923c505706527 (patch)
tree: 22102adfe74bfcf871a1946d87d6e3161bb9ea49
parent: cacba6b362fb7907d1bb2d529e95504b27a00d25 (diff)
download: fetchmail-f27916a7f27f00e1d2325538e55923c505706527.tar.gz
fetchmail-f27916a7f27f00e1d2325538e55923c505706527.tar.bz2
fetchmail-f27916a7f27f00e1d2325538e55923c505706527.zip
1 files changed, 652 insertions, 0 deletions
diff --git a/unmime.c b/unmime.c
new file mode 100644
index 00000000..e1690c27
--- /dev/null
+++ b/unmime.c
@@ -0,0 +1,652 @@
+/*
+ * MIME mail decoding.
+ *
+ * This module contains decoding routines for converting
+ * quoted-printable data into pure 8-bit data, in MIME
+ * formatted messages.
+ *
+ * By Henrik Storner <storner@image.dk>
+ *
+ * Configuration file support for fetchmail 4.3.8 by 
+ * Frank Damgaard <frda@post3.tele.dk>
+ * 
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include "fetchmail.h"
+
+static inline unsigned char unhex(unsigned char c)
+{
+  if ((c >= '0') && (c <= '9'))
+    return (c - '0');
+  else if ((c >= 'A') && (c <= 'F'))
+    return (c - 'A' + 10);
+  else if ((c >= 'a') && (c <= 'f'))
+    return (c - 'a' + 10);
+  else
+    return c;
+}
+
+static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
+{
+  c1 = unhex(c1);
+  c2 = unhex(c2);
+
+  if ((c1 > 15) || (c2 > 15)) 
+    return 1;
+  else {
+    *c_out = 16*c1+c2;
+    return 0;
+  }
+}
+
+
+
+/*
+ * Routines to decode MIME QP-encoded headers, as per RFC 2047.
+ */
+
+/* States of the decoding state machine */
+#define S_COPY_PLAIN        0	/* Just copy, but watch for the QP flag */
+#define S_SKIP_MIMEINIT     1	/* Get the encoding, and skip header */
+#define S_COPY_MIME         2	/* Decode a sequence of coded characters */
+
+static const char MIMEHDR_INIT[]  = "=?";	/* Start of coded sequence */
+static const char MIMEHDR_END[]   = "?=";	/* End of coded sequence */
+
+void UnMimeHeader(unsigned char *hdr)
+{
+  /* Decode a buffer containing data encoded according to RFC
+   * 2047. This only handles content-transfer-encoding; conversion
+   * between character sets is not implemented.  In other words: We
+   * assume the charsets used can be displayed by your mail program
+   * without problems. 
+   */
+
+  /* Note: Decoding is done "in-situ", i.e. without using an
+   * additional buffer for temp. storage. This is possible, since the
+   * decoded string will always be shorter than the encoded string,
+   * due to the en- coding scheme.
+   */
+
+  int  state = S_COPY_PLAIN;
+  unsigned char *p_in, *p_out, *p;
+  unsigned char enc;
+  int  i;
+
+  /* Speed up in case this is not a MIME-encoded header */
+  p = strstr(hdr, MIMEHDR_INIT);
+  if (p == NULL)
+    return;   /* No MIME header */
+
+  /* Loop through the buffer.
+   *  p_in : Next char to be processed.
+   *  p_out: Where to put the next processed char
+   *  enc  : Encoding used (usually, 'q' = quoted-printable)
+   */
+  for (p_out = p_in = hdr; (*p_in); ) {
+    switch (state) {
+    case S_COPY_PLAIN:
+      p = strstr(p_in, MIMEHDR_INIT);
+      if (p == NULL) {
+	/* 
+	 * No more coded data in buffer, 
+         * just move remainder into place. 
+	 */
+        i = strlen(p_in);   /* How much left */
+	memmove(p_out, p_in, i);
+	p_in += i; p_out += i;
+      }
+      else {
+	/* MIME header init found at location p */
+	if (p > p_in) {
+          /* There are some uncoded chars at the beginning. */
+          i = (p - p_in);
+	  memmove(p_out, p_in, i);
+	  p_out += i;
+	}
+	p_in = (p + 2);
+	state = S_SKIP_MIMEINIT;
+      }
+      break;
+
+    case S_SKIP_MIMEINIT:
+      /* Mime type definition: "charset?encoding?" */
+      p = strchr(p_in, '?');
+      if (p != NULL) {
+	/* p_in .. (p-1) holds the charset */
+
+	/* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
+	if (*(p+2) == '?') {
+	  enc = tolower(*(p+1));
+	  p_in = p+3;
+	  state = S_COPY_MIME;
+	}
+	else
+	  state = S_COPY_PLAIN;
+      }
+      else
+	state = S_COPY_PLAIN;   /* Invalid data */
+      break;
+
+    case S_COPY_MIME:
+      p = strstr(p_in, MIMEHDR_END);  /* Find end of coded data */
+      if (p == NULL) p = p_in + strlen(p_in);
+      for (; (p_in < p); ) {
+	/* Decode all encoded data */
+	if (enc == 'q') {
+	  if (*p_in == '=') {
+	    /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
+	    if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
+	      p_in += 3;
+	    else {
+	      /* Invalid QP data - pass through unchanged. */
+	      *p_out = *p_in;
+	      p_in++;
+	    }
+	  }
+	  else if (*p_in == '_') {
+	    /* 
+             * RFC 2047: '_' inside encoded word represents 0x20.
+             * NOT a space - always the value 0x20.
+             */
+	    *p_out = 0x20;
+	    p_in++;
+	  }
+	  else {
+	    /* Copy unchanged */
+	    *p_out = *p_in;
+	    p_in++;
+	  }
+	  p_out++;
+	}
+	else if (enc == 'b') {
+	  /* Decode base64 encoded data */
+	  char delimsave;
+	  int decoded_count;
+
+	  delimsave = *p; *p = '\r';
+	  decoded_count = from64tobits(p_out, p_in);
+	  *p = delimsave;
+	  if (decoded_count > 0) 
+	    p_out += decoded_count;            
+	  p_in = p;
+	}
+	else {
+	  /* Copy unchanged */
+	  *p_out = *p_in;
+	  p_in++;
+	  p_out++;
+	}
+      }
+      if (*p_in)
+	p_in += 2;   /* Skip the MIMEHDR_END delimiter */
+
+      /* 
+       * We've completed decoding one encoded sequence. But another
+       * may follow immediately, in which case whitespace before the
+       * new MIMEHDR_INIT delimiter must be discarded.
+       * See if that is the case 
+       */
+      p = strstr(p_in, MIMEHDR_INIT);
+      state = S_COPY_PLAIN;
+      if (p != NULL) {
+	/*
+	 * There is more MIME data later on. Is there
+         * whitespace  only before the delimiter? 
+	 */
+        unsigned char *q;
+        int  wsp_only = 1;
+
+        for (q=p_in; (wsp_only && (q < p)); q++)
+          wsp_only = isspace(*q);
+
+        if (wsp_only) {
+	  /* 
+	   * Whitespace-only before the MIME delimiter. OK,
+           * just advance p_in to past the new MIMEHDR_INIT,
+           * and prepare to process the new MIME charset/encoding
+	   * header.
+	   */
+	  p_in = p + strlen(MIMEHDR_INIT);
+	  state = S_SKIP_MIMEINIT;
+        }
+      }
+      break;
+    }
+  }
+
+  *p_out = '\0';
+}
+
+
+
+/*
+ * Routines for decoding body-parts of a message.
+ *
+ * Since the "fetch" part of fetchmail gets a message body
+ * one line at a time, we need to maintain some state variables
+ * across multiple invokations of the UnMimeBodyline() routine.
+ * The driver routine should call MimeBodyType() when all
+ * headers have been received, and then UnMimeBodyline() for
+ * every line in the message body.
+ *
+ */
+#define S_BODY_DATA 0
+#define S_BODY_HDR  1
+
+/* 
+ * Flag indicating if we are currently processing 
+ * the headers or the body of a (multipart) message.
+ */
+static int  BodyState = S_BODY_DATA;
+
+/* 
+ * Flag indicating if we are in the process of decoding
+ * a quoted-printable body part.
+ */
+static int  CurrEncodingIsQP = 0;
+
+/* 
+ * Delimiter for multipart messages. RFC 2046 states that this must
+ * NEVER be longer than 70 characters. Add 3 for the two hyphens
+ * at the beginning, and a terminating null.
+ */
+#define MAX_DELIM_LEN 70
+static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3];
+
+
+/* This string replaces the "Content-Transfer-Encoding: quoted-printable"
+ * string in all headers, including those in body-parts. It must be
+ * no longer than the original string.
+ */
+static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit";
+static void SetEncoding8bit(unsigned char *XferEncOfs)
+{
+  unsigned char *p;
+
+  if (XferEncOfs != NULL) {
+     memcpy(XferEncOfs, ENC8BIT, strlen(ENC8BIT));
+
+     /* If anything left, in this header, replace with whitespace */
+     for (p=XferEncOfs+strlen(ENC8BIT); (*p >= ' '); p++) *p=' ';
+  }
+}
+
+
+/*
+ * This routine does three things:
+ * 1) It determines - based on the message headers - whether the
+ *    message body is a MIME message that may hold 8 bit data.
+ *    - A message that has a "quoted-printable" or "8bit" transfer 
+ *      encoding is assumed to contain 8-bit data (when decoded).
+ *    - A multipart message is assumed to contain 8-bit data
+ *      when decoded (there might be quoted-printable body-parts).
+ *    - All other messages are assumed NOT to include 8-bit data.
+ * 2) It determines the delimiter-string used in multi-part message
+ *    bodies.
+ * 3) It sets the initial values of the CurrEncodingIsQP and BodyState
+ *    variables, from the header contents.
+ *
+ * The return value is a bitmask.
+ */
+int MimeBodyType(unsigned char *hdrs)
+{
+  unsigned char *NxtHdr = hdrs;
+  unsigned char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p;
+  int  HdrsFound = 0;     /* We only look for three headers */
+  int  BodyType;          /* Return value */ 
+
+  /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */
+  MultipartDelimiter[0] = '\0';
+  CurrEncodingIsQP = 0;
+  BodyState = S_BODY_DATA;
+  BodyType = 0;
+
+  /* Just in case ... */
+  if (hdrs == NULL)
+    return BodyType;
+
+  XferEnc = XferEncOfs = CntType = MimeVer = NULL;
+
+  do {
+    if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) {
+      XferEncOfs = NxtHdr;
+      p = nxtaddr(NxtHdr);
+      if (p != NULL) {
+	XferEnc = (char *)xmalloc(strlen(p) + 1);
+	strcpy(XferEnc, p);
+	HdrsFound++;
+      }
+    }
+    else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) {
+      /*
+       * This one is difficult. We cannot use the standard
+       * nxtaddr() routine, since the boundary-delimiter is
+       * (probably) enclosed in quotes - and thus appears
+       * as an rfc822 comment, and nxtaddr() "eats" up any
+       * spaces in the delimiter. So, we have to do this
+       * by hand.
+       */
+
+      /* Skip the "Content-Type:" part and whitespace after it */
+      for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == '\t')); NxtHdr++);
+
+      /* 
+       * Get the full value of the Content-Type header;
+       * it might span multiple lines. So search for
+       * a newline char, but ignore those that have a
+       * have a TAB or space just after the NL (continued
+       * lines).
+       */
+      p = NxtHdr-1;
+      do {
+        p=strchr((p+1),'\n'); 
+      } while ( (p != NULL) && ((*(p+1) == '\t') || (*(p+1) == ' ')) );
+      if (p == NULL) p = NxtHdr + strlen(NxtHdr);
+
+      CntType = (char *)xmalloc(p-NxtHdr+2);
+      strncpy(CntType, NxtHdr, (p-NxtHdr));
+      *(CntType+(p-NxtHdr)) = '\0';
+      HdrsFound++;
+    }
+    else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0) {
+      p = nxtaddr(NxtHdr);
+      if (p != NULL) {
+	MimeVer = (char *)xmalloc(strlen(p) + 1);
+	strcpy(MimeVer, p);
+	HdrsFound++;
+      }
+    }
+
+    NxtHdr = (strchr(NxtHdr, '\n'));
+    if (NxtHdr != NULL) NxtHdr++;
+  } while ((NxtHdr != NULL) && (*NxtHdr) && (HdrsFound != 3));
+
+
+  /* Done looking through the headers, now check what they say */
+  if ((MimeVer != NULL) && (strcmp(MimeVer, "1.0") == 0)) {
+
+    /* Check Content-Type to see if this is a multipart message */
+    if (CntType != NULL) {
+      if ((strncasecmp(CntType, "multipart/", 10) == 0) ||
+	  (strncasecmp(CntType, "message/", 8) == 0)) {
+
+	char *p1, *p2;
+
+	/* Search for "boundary=" */
+	p1 = strchr(CntType, '=');
+	if (p1 != NULL) {
+	  /* Skip the '=' and any whitespace after it */
+	  for (p1++; (isspace(*p1)); p1++); 
+              
+	  /* The delimiter might be inside quotes */
+	  if (*p1 == '\"') {
+	    p1++;
+	    p2 = strchr(p1, '\"');
+	    if (p2 != NULL)
+	      *p2 = '\0';
+	  }
+
+	  if (strlen(p1) > 0) {
+	    /* The actual delimiter is "--" followed by 
+	       the boundary string */
+	    strcpy(MultipartDelimiter, "--");
+	    strncat(MultipartDelimiter, p1, MAX_DELIM_LEN);
+	    BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
+	  }
+	}
+      }
+    }
+
+    /* 
+     * Check Content-Transfer-Encoding, but
+     * ONLY for non-multipart messages (BodyType == 0).
+     */
+    if ((XferEnc != NULL) && (BodyType == 0)) {
+      if (strcasecmp(XferEnc, "quoted-printable") == 0) {
+	CurrEncodingIsQP = 1;
+	BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
+        SetEncoding8bit(XferEncOfs);
+      }
+      else if (strcasecmp(XferEnc, "7bit") == 0) {
+	CurrEncodingIsQP = 0;
+	BodyType = (MSG_IS_7BIT);
+      }
+      else if (strcasecmp(XferEnc, "8bit") == 0) {
+	CurrEncodingIsQP = 0;
+	BodyType = (MSG_IS_8BIT);
+      }
+    }
+
+  }
+
+  if (MimeVer) free(MimeVer);
+  if (XferEnc) free(XferEnc);
+  if (CntType) free(CntType);
+
+  return BodyType;
+}
+
+
+/*
+ * Decode one line of data containing QP data.
+ * Return flag set if this line ends with a soft line-break.
+ * 'bufp' is modified to point to the end of the output buffer.
+ */
+static int DoOneQPLine(unsigned char **bufp, int collapsedoubledot)
+{
+  unsigned char *buf = *bufp;
+  unsigned char *p_in, *p_out, *p;
+  int n;
+  int ret = 0;
+
+  p_in = buf;
+  if (collapsedoubledot && (strncmp(buf, "..", 2) == 0))
+    p_in++;
+
+  for (p_out = buf; (*p_in); ) {
+    p = strchr(p_in, '=');
+    if (p == NULL) {
+      /* No more QP data, just move remainder into place */
+      n = strlen(p_in);
+      memmove(p_out, p_in, n);
+      p_in += n; p_out += n;
+    }
+    else {
+      if (p > p_in) {
+	/* There are some uncoded chars at the beginning. */
+	n = (p - p_in);
+	memmove(p_out, p_in, n);
+	p_out += n;
+      }
+              
+      switch (*(p+1)) {
+      case '\0': case '\r': case '\n':
+	/* Soft line break, skip '=' */
+	p_in = p+1; 
+	if (*p_in == '\r') p_in++;
+	if (*p_in == '\n') p_in++;
+        ret = 1;
+	break;
+
+      default:
+	/* There is a QP encoded byte */
+	if (qp_char(*(p+1), *(p+2), p_out) == 0) {
+	  p_in = p+3;
+	}
+	else {
+	  /* Invalid QP data - pass through unchanged. */
+	  *p_out = '=';
+	  p_in = p+1;
+	}
+	p_out++;
+	break;
+      }
+    }
+  }
+
+  *p_out = '\0';
+  *bufp = p_out;
+  return ret;
+}
+
+
+/* This is called once per line in the message body.  We need to scan
+ * all lines in the message body for the multipart delimiter string,
+ * and handle any body-part headers in such messages (these can toggle
+ * qp-decoding on and off).
+ *
+ * Note: Messages that are NOT multipart-messages go through this
+ * routine quickly, since BodyState will always be S_BODY_DATA,
+ * and MultipartDelimiter is NULL.
+ *
+ * Return flag set if this line ends with a soft line-break.
+ * 'bufp' is modified to point to the end of the output buffer.
+ */
+
+int UnMimeBodyline(unsigned char **bufp, int collapsedoubledot)
+{
+  unsigned char *buf = *bufp;
+  int ret = 0;
+
+  switch (BodyState) {
+  case S_BODY_HDR:
+    UnMimeHeader(buf);   /* Headers in body-parts can be encoded, too! */
+    if (strncasecmp("Content-Transfer-Encoding:", buf, 26) == 0) {
+      char *XferEnc;
+
+      XferEnc = nxtaddr(buf);
+      if ((XferEnc != NULL) && (strcasecmp(XferEnc, "quoted-printable") == 0)) {
+	CurrEncodingIsQP = 1;
+	SetEncoding8bit(buf);
+      }
+    }
+    else if ((*buf == '\0') || (*buf == '\n') || (strcmp(buf, "\r\n") == 0))
+      BodyState = S_BODY_DATA;
+
+    *bufp = (buf + strlen(buf));
+    break;
+
+  case S_BODY_DATA:
+    if ((*MultipartDelimiter) && 
+	(strncmp(buf, MultipartDelimiter, strlen(MultipartDelimiter)) == 0)) {
+      BodyState = S_BODY_HDR;
+      CurrEncodingIsQP = 0;
+    }
+
+    if (CurrEncodingIsQP) 
+      ret = DoOneQPLine(bufp, collapsedoubledot);
+    else
+     *bufp = (buf + strlen(buf));
+    break;
+  }
+
+  return ret;
+}
+
+
+#ifdef STANDALONE
+#include <stdio.h>
+#include <unistd.h>
+
+char *program_name = "unmime";
+
+#define BUFSIZE_INCREMENT 4096
+
+#ifdef DEBUG
+#define DBG_FWRITE(B,L,BS,FD) fwrite(B, L, BS, FD)
+#else
+#define DBG_FWRITE(B,L,BS,FD)
+#endif
+
+int main(int argc, char *argv[])
+{
+  unsigned int BufSize;
+  unsigned char *buffer, *buf_p;
+  int nl_count, i, bodytype;
+
+#ifdef DEBUG
+  pid_t pid;
+  FILE *fd_orig, *fd_conv;
+  char fnam[100];
+
+  pid = getpid();
+  sprintf(fnam, "/tmp/i_unmime.%x", pid);
+  fd_orig = fopen(fnam, "w");
+  sprintf(fnam, "/tmp/o_unmime.%x", pid);
+  fd_conv = fopen(fnam, "w");
+#endif
+
+  BufSize = BUFSIZE_INCREMENT;    /* Initial size of buffer */
+  buf_p = buffer = (unsigned char *) xmalloc(BufSize);
+  nl_count = 0;
+
+  do {
+    i = fread(buf_p, 1, 1, stdin);
+    switch (*buf_p) {
+     case '\n':
+       nl_count++;
+       break;
+
+     case '\r':
+       break;
+
+     default:
+       nl_count = 0;
+       break;
+    }
+
+    buf_p++;
+    if ((buf_p - buffer) == BufSize) {
+       /* Buffer is full! Get more room. */
+       buffer = xrealloc(buffer, BufSize+BUFSIZE_INCREMENT);
+       buf_p = buffer + BufSize;
+       BufSize += BUFSIZE_INCREMENT;
+    }
+  } while ((i > 0) && (nl_count < 2));
+
+  *buf_p = '\0';
+  DBG_FWRITE(buffer, strlen(buffer), 1, fd_orig);
+
+  UnMimeHeader(buffer);
+  bodytype = MimeBodyType(buffer);
+
+  i = strlen(buffer);
+  fwrite(buffer, i, 1, stdout);
+  DBG_FWRITE(buffer, i, 1, fd_conv);
+  
+  do {
+     buf_p = (buffer - 1);
+     do {
+        buf_p++;
+        i = fread(buf_p, 1, 1, stdin);
+     } while ((i == 1) && (*buf_p != '\n'));
+     if (i == 1) buf_p++;
+     *buf_p = '\0';
+     DBG_FWRITE(buf, (buf_p - buffer), 1, fd_orig);
+
+     if (buf_p > buffer) {
+        if (bodytype & MSG_NEEDS_DECODE) {
+           buf_p = buffer;
+           UnMimeBodyline(&buf_p, 0);
+        }
+        fwrite(buffer, (buf_p - buffer), 1, stdout);
+        DBG_FWRITE(buffer, (buf_p - buffer), 1, fd_conv);
+     }
+  } while (buf_p > buffer);
+
+  free(buffer);
+  fflush(stdout);
+
+#ifdef DEBUG
+  fclose(fd_orig);
+  fclose(fd_conv);
+#endif
+
+  return 0;
+}
+#endif
+
author	Eric S. Raymond <esr@thyrsus.com>	1998-03-26 19:48:28 +0000
committer	Eric S. Raymond <esr@thyrsus.com>	1998-03-26 19:48:28 +0000
commit	f27916a7f27f00e1d2325538e55923c505706527 (patch)
tree	22102adfe74bfcf871a1946d87d6e3161bb9ea49
parent	cacba6b362fb7907d1bb2d529e95504b27a00d25 (diff)
download	fetchmail-f27916a7f27f00e1d2325538e55923c505706527.tar.gz fetchmail-f27916a7f27f00e1d2325538e55923c505706527.tar.bz2 fetchmail-f27916a7f27f00e1d2325538e55923c505706527.zip