aboutsummaryrefslogtreecommitdiffstats
path: root/README
diff options
context:
space:
mode:
authorEric S. Raymond <esr@thyrsus.com>2002-09-17 09:36:00 +0000
committerEric S. Raymond <esr@thyrsus.com>2002-09-17 09:36:00 +0000
commit6ddd8c9505a4620fd4b8f2a16a2de7ae1930897f (patch)
treef0023cf3ace75fa8f7b0304a74d802b04c1f65d0 /README
parent8f4a52422003ec63e851d60fc0d0441a9eca7c58 (diff)
downloadfetchmail-6ddd8c9505a4620fd4b8f2a16a2de7ae1930897f.tar.gz
fetchmail-6ddd8c9505a4620fd4b8f2a16a2de7ae1930897f.tar.bz2
fetchmail-6ddd8c9505a4620fd4b8f2a16a2de7ae1930897f.zip
Fix a minor bug reported by Matthias Andree.
svn path=/trunk/; revision=3722
Diffstat (limited to 'README')
0 files changed, 0 insertions, 0 deletions
n120' href='#n120'>120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
/*
 * MIME mail decoding.
 *
 * This module contains decoding routines for converting
 * quoted-printable data into pure 8-bit data, in MIME
 * formatted messages.
 *
 * By Henrik Storner <storner@image.dk>
 *
 * Configuration file support for fetchmail 4.3.8 by 
 * Frank Damgaard <frda@post3.tele.dk>
 * 
 */

#include "config.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include "fetchmail.h"

static unsigned char unhex(unsigned char c)
{
  if ((c >= '0') && (c <= '9'))
    return (c - '0');
  else if ((c >= 'A') && (c <= 'F'))
    return (c - 'A' + 10);
  else if ((c >= 'a') && (c <= 'f'))
    return (c - 'a' + 10);
  else
    return c;
}

static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
{
  c1 = unhex(c1);
  c2 = unhex(c2);

  if ((c1 > 15) || (c2 > 15)) 
    return 1;
  else {
    *c_out = 16*c1+c2;
    return 0;
  }
}



/*
 * Routines to decode MIME QP-encoded headers, as per RFC 2047.
 */

/* States of the decoding state machine */
#define S_COPY_PLAIN        0	/* Just copy, but watch for the QP flag */
#define S_SKIP_MIMEINIT     1	/* Get the encoding, and skip header */
#define S_COPY_MIME         2	/* Decode a sequence of coded characters */

static const char MIMEHDR_INIT[]  = "=?";	/* Start of coded sequence */
static const char MIMEHDR_END[]   = "?=";	/* End of coded sequence */

void UnMimeHeader(unsigned char *hdr)
{
  /* Decode a buffer containing data encoded according to RFC
   * 2047. This only handles content-transfer-encoding; conversion
   * between character sets is not implemented.  In other words: We
   * assume the charsets used can be displayed by your mail program
   * without problems. 
   */

  /* Note: Decoding is done "in-situ", i.e. without using an
   * additional buffer for temp. storage. This is possible, since the
   * decoded string will always be shorter than the encoded string,
   * due to the en- coding scheme.
   */

  int  state = S_COPY_PLAIN;
  unsigned char *p_in, *p_out, *p;
  unsigned char enc = '\0';		/* initialization pacifies -Wall */
  int  i;

  /* Speed up in case this is not a MIME-encoded header */
  p = strstr(hdr, MIMEHDR_INIT);
  if (p == NULL)
    return;   /* No MIME header */

  /* Loop through the buffer.
   *  p_in : Next char to be processed.
   *  p_out: Where to put the next processed char
   *  enc  : Encoding used (usually, 'q' = quoted-printable)
   */
  for (p_out = p_in = hdr; (*p_in); ) {
    switch (state) {
    case S_COPY_PLAIN:
      p = strstr(p_in, MIMEHDR_INIT);
      if (p == NULL) {
	/* 
	 * No more coded data in buffer, 
         * just move remainder into place. 
	 */
        i = strlen(p_in);   /* How much left */
	memmove(p_out, p_in, i);
	p_in += i; p_out += i;
      }
      else {
	/* MIME header init found at location p */
	if (p > p_in) {
          /* There are some uncoded chars at the beginning. */
          i = (p - p_in);
	  memmove(p_out, p_in, i);
	  p_out += i;
	}
	p_in = (p + 2);
	state = S_SKIP_MIMEINIT;
      }
      break;

    case S_SKIP_MIMEINIT:
      /* Mime type definition: "charset?encoding?" */
      p = strchr(p_in, '?');
      if (p != NULL) {
	/* p_in .. (p-1) holds the charset */

	/* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
	if (*(p+2) == '?') {
	  enc = tolower(*(p+1));
	  p_in = p+3;
	  state = S_COPY_MIME;
	}
	else
	  state = S_COPY_PLAIN;
      }
      else
	state = S_COPY_PLAIN;   /* Invalid data */
      break;

    case S_COPY_MIME:
      p = strstr(p_in, MIMEHDR_END);  /* Find end of coded data */
      if (p == NULL) p = p_in + strlen(p_in);
      for (; (p_in < p); ) {
	/* Decode all encoded data */
	if (enc == 'q') {
	  if (*p_in == '=') {
	    /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
	    if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
	      p_in += 3;
	    else {
	      /* Invalid QP data - pass through unchanged. */
	      *p_out = *p_in;
	      p_in++;
	    }
	  }
	  else if (*p_in == '_') {
	    /* 
             * RFC 2047: '_' inside encoded word represents 0x20.
             * NOT a space - always the value 0x20.
             */
	    *p_out = 0x20;
	    p_in++;
	  }
	  else {
	    /* Copy unchanged */
	    *p_out = *p_in;
	    p_in++;
	  }
	  p_out++;
	}
	else if (enc == 'b') {
	  /* Decode base64 encoded data */
	  char delimsave;
	  int decoded_count;

	  delimsave = *p; *p = '\r';
	  decoded_count = from64tobits(p_out, p_in);
	  *p = delimsave;
	  if (decoded_count > 0) 
	    p_out += decoded_count;            
	  p_in = p;
	}
	else {
	  /* Copy unchanged */
	  *p_out = *p_in;
	  p_in++;
	  p_out++;
	}
      }
      if (*p_in)
	p_in += 2;   /* Skip the MIMEHDR_END delimiter */

      /* 
       * We've completed decoding one encoded sequence. But another
       * may follow immediately, in which case whitespace before the
       * new MIMEHDR_INIT delimiter must be discarded.
       * See if that is the case 
       */
      p = strstr(p_in, MIMEHDR_INIT);
      state = S_COPY_PLAIN;
      if (p != NULL) {
	/*
	 * There is more MIME data later on. Is there
         * whitespace  only before the delimiter? 
	 */
        unsigned char *q;
        int  wsp_only = 1;

        for (q=p_in; (wsp_only && (q < p)); q++)
          wsp_only = isspace(*q);

        if (wsp_only) {
	  /* 
	   * Whitespace-only before the MIME delimiter. OK,
           * just advance p_in to past the new MIMEHDR_INIT,
           * and prepare to process the new MIME charset/encoding
	   * header.
	   */
	  p_in = p + strlen(MIMEHDR_INIT);
	  state = S_SKIP_MIMEINIT;
        }
      }
      break;
    }
  }

  *p_out = '\0';
}



/*
 * Routines for decoding body-parts of a message.
 *
 * Since the "fetch" part of fetchmail gets a message body
 * one line at a time, we need to maintain some state variables
 * across multiple invokations of the UnMimeBodyline() routine.
 * The driver routine should call MimeBodyType() when all
 * headers have been received, and then UnMimeBodyline() for
 * every line in the message body.
 *
 */
#define S_BODY_DATA 0
#define S_BODY_HDR  1

/* 
 * Flag indicating if we are currently processing 
 * the headers or the body of a (multipart) message.
 */
static int  BodyState = S_BODY_DATA;

/* 
 * Flag indicating if we are in the process of decoding
 * a quoted-printable body part.
 */
static int  CurrEncodingIsQP = 0;

/* 
 * Delimiter for multipart messages. RFC 2046 states that this must
 * NEVER be longer than 70 characters. Add 3 for the two hyphens
 * at the beginning, and a terminating null.
 */
#define MAX_DELIM_LEN 70
static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3];


/* This string replaces the "Content-Transfer-Encoding: quoted-printable"
 * string in all headers, including those in body-parts. The replacement
 * must be no longer than the original string.
 */
static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit";
static void SetEncoding8bit(unsigned char *XferEncOfs)
{
  unsigned char *p;

  if (XferEncOfs != NULL) {
     memcpy(XferEncOfs, ENC8BIT, strlen(ENC8BIT));

     /* If anything left, in this header, replace with whitespace */
     for (p=XferEncOfs+strlen(ENC8BIT); (*p >= ' '); p++) *p=' ';
  }
}

static char *GetBoundary(char *CntType)
{
  char *p1, *p2;
  int flag;

  /* Find the "boundary" delimiter. It must be preceded with a ';'
   * and optionally some whitespace.
   */
  p1 = CntType;
  do {
    p2 = strchr(p1, ';'); 
    if (p2)
      for (p2++; isspace(*p2); p2++);

    p1 = p2;
  } while ((p1) && (strncasecmp(p1, "boundary", 8) != 0));

  if (p1 == NULL)
    /* No boundary delimiter */
    return NULL;

  /* Skip "boundary", whitespace and '='; check that we do have a '=' */
  for (p1+=8, flag=0; (isspace(*p1) || (*p1 == '=')); p1++)
    flag |= (*p1 == '=');
  if (!flag)
    return NULL;

  /* Find end of boundary delimiter string */
  if (*p1 == '\"') {
    /* The delimiter is inside quotes */
    p1++;
    p2 = strchr(p1, '\"');
    if (p2 == NULL)
      return NULL;  /* No closing '"' !?! */
  }
  else {
    /* There might be more text after the "boundary" string. */
    p2 = strchr(p1, ';');  /* Safe - delimiter with ';' must be in quotes */
  }

  /* Zero-terminate the boundary string */
  if (p2 != NULL)
    *p2 = '\0';

  return (p1 && strlen(p1)) ? p1 : NULL;
}


/*
 * This routine does three things:
 * 1) It determines - based on the message headers - whether the
 *    message body is a MIME message that may hold 8 bit data.
 *    - A message that has a "quoted-printable" or "8bit" transfer 
 *      encoding is assumed to contain 8-bit data (when decoded).
 *    - A multipart message is assumed to contain 8-bit data
 *      when decoded (there might be quoted-printable body-parts).
 *    - All other messages are assumed NOT to include 8-bit data.
 * 2) It determines the delimiter-string used in multi-part message
 *    bodies.
 * 3) It sets the initial values of the CurrEncodingIsQP and BodyState
 *    variables, from the header contents.
 *
 * The return value is a bitmask.
 */
int MimeBodyType(unsigned char *hdrs, int WantDecode)
{
  unsigned char *NxtHdr = hdrs;
  unsigned char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p;
  int  HdrsFound = 0;     /* We only look for three headers */
  int  BodyType;          /* Return value */ 

  /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */
  MultipartDelimiter[0] = '\0';
  CurrEncodingIsQP = 0;
  BodyState = S_BODY_DATA;
  BodyType = 0;

  /* Just in case ... */
  if (hdrs == NULL)
    return BodyType;

  XferEnc = XferEncOfs = CntType = MimeVer = NULL;

  do {
    if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) {
      XferEncOfs = NxtHdr;
      p = nxtaddr(NxtHdr);
      if (p != NULL) {
	xalloca(XferEnc, char *, strlen(p) + 1);
	strcpy(XferEnc, p);
	HdrsFound++;
      }
    }
    else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) {
      /*
       * This one is difficult. We cannot use the standard
       * nxtaddr() routine, since the boundary-delimiter is
       * (probably) enclosed in quotes - and thus appears
       * as an rfc822 comment, and nxtaddr() "eats" up any
       * spaces in the delimiter. So, we have to do this
       * by hand.
       */

      /* Skip the "Content-Type:" part and whitespace after it */
      for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == '\t')); NxtHdr++);

      /* 
       * Get the full value of the Content-Type header;
       * it might span multiple lines. So search for
       * a newline char, but ignore those that have a
       * have a TAB or space just after the NL (continued
       * lines).
       */
      p = NxtHdr-1;
      do {
        p=strchr((p+1),'\n'); 
      } while ( (p != NULL) && ((*(p+1) == '\t') || (*(p+1) == ' ')) );
      if (p == NULL) p = NxtHdr + strlen(NxtHdr);

      xalloca(CntType, char *, p-NxtHdr+2);
      strncpy(CntType, NxtHdr, (p-NxtHdr));
      *(CntType+(p-NxtHdr)) = '\0';
      HdrsFound++;
    }
    else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0) {
      p = nxtaddr(NxtHdr);
      if (p != NULL) {
	xalloca(MimeVer, char *, strlen(p) + 1);
	strcpy(MimeVer, p);
	HdrsFound++;
      }
    }

    NxtHdr = (strchr(NxtHdr, '\n'));
    if (NxtHdr != NULL) NxtHdr++;
  } while ((NxtHdr != NULL) && (*NxtHdr) && (HdrsFound != 3));


  /* Done looking through the headers, now check what they say */
  if ((MimeVer != NULL) && (strcmp(MimeVer, "1.0") == 0)) {

    /* Check Content-Type to see if this is a multipart message */
    if ( (CntType != NULL) &&
         ((strncasecmp(CntType, "multipart/", 10) == 0) ||
	  (strncasecmp(CntType, "message/", 8) == 0)) ) {

      char *p1 = GetBoundary(CntType);

      if (p1 != NULL) {
	/* The actual delimiter is "--" followed by 
	   the boundary string */
	strcpy(MultipartDelimiter, "--");
	strncat(MultipartDelimiter, p1, MAX_DELIM_LEN);
	BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
      }
    }

    /* 
     * Check Content-Transfer-Encoding, but
     * ONLY for non-multipart messages (BodyType == 0).
     */
    if ((XferEnc != NULL) && (BodyType == 0)) {
      if (strcasecmp(XferEnc, "quoted-printable") == 0) {
	CurrEncodingIsQP = 1;
	BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
	if (WantDecode) {
           SetEncoding8bit(XferEncOfs);
        }
      }
      else if (strcasecmp(XferEnc, "7bit") == 0) {
	CurrEncodingIsQP = 0;
	BodyType = (MSG_IS_7BIT);
      }
      else if (strcasecmp(XferEnc, "8bit") == 0) {
	CurrEncodingIsQP = 0;
	BodyType = (MSG_IS_8BIT);
      }
    }

  }

  return BodyType;
}


/*
 * Decode one line of data containing QP data.
 * Return flag set if this line ends with a soft line-break.
 * 'bufp' is modified to point to the end of the output buffer.
 */
static int DoOneQPLine(unsigned char **bufp, int collapsedoubledot)
{
  unsigned char *buf = *bufp;
  unsigned char *p_in, *p_out, *p;
  int n;
  int ret = 0;

  p_in = buf;
  if (collapsedoubledot && (strncmp(buf, "..", 2) == 0))
    p_in++;

  for (p_out = buf; (*p_in); ) {
    p = strchr(p_in, '=');
    if (p == NULL) {
      /* No more QP data, just move remainder into place */
      n = strlen(p_in);
      memmove(p_out, p_in, n);
      p_in += n; p_out += n;
    }
    else {
      if (p > p_in) {
	/* There are some uncoded chars at the beginning. */
	n = (p - p_in);
	memmove(p_out, p_in, n);
	p_out += n;
      }
              
      switch (*(p+1)) {
      case '\0': case '\r': case '\n':
	/* Soft line break, skip '=' */
	p_in = p+1; 
	if (*p_in == '\r') p_in++;
	if (*p_in == '\n') p_in++;
        ret = 1;
	break;

      default:
	/* There is a QP encoded byte */
	if (qp_char(*(p+1), *(p+2), p_out) == 0) {
	  p_in = p+3;
	}
	else {
	  /* Invalid QP data - pass through unchanged. */
	  *p_out = '=';
	  p_in = p+1;
	}
	p_out++;
	break;
      }
    }
  }

  *p_out = '\0';
  *bufp = p_out;
  return ret;
}


/* This is called once per line in the message body.  We need to scan
 * all lines in the message body for the multipart delimiter string,
 * and handle any body-part headers in such messages (these can toggle
 * qp-decoding on and off).
 *
 * Note: Messages that are NOT multipart-messages go through this
 * routine quickly, since BodyState will always be S_BODY_DATA,
 * and MultipartDelimiter is NULL.
 *
 * Return flag set if this line ends with a soft line-break.
 * 'bufp' is modified to point to the end of the output buffer.
 */

int UnMimeBodyline(unsigned char **bufp, int collapsedoubledot)
{
  unsigned char *buf = *bufp;
  int ret = 0;

  switch (BodyState) {
  case S_BODY_HDR:
    UnMimeHeader(buf);   /* Headers in body-parts can be encoded, too! */
    if (strncasecmp("Content-Transfer-Encoding:", buf, 26) == 0) {
      char *XferEnc;

      XferEnc = nxtaddr(buf);
      if ((XferEnc != NULL) && (strcasecmp(XferEnc, "quoted-printable") == 0)) {
	CurrEncodingIsQP = 1;
	SetEncoding8bit(buf);
      }
    }
    else if ((*buf == '\0') || (*buf == '\n') || (strcmp(buf, "\r\n") == 0))
      BodyState = S_BODY_DATA;

    *bufp = (buf + strlen(buf));
    break;

  case S_BODY_DATA:
    if ((*MultipartDelimiter) && 
	(strncmp(buf, MultipartDelimiter, strlen(MultipartDelimiter)) == 0)) {
      BodyState = S_BODY_HDR;
      CurrEncodingIsQP = 0;
    }

    if (CurrEncodingIsQP) 
      ret = DoOneQPLine(bufp, collapsedoubledot);
    else
     *bufp = (buf + strlen(buf));
    break;
  }

  return ret;
}


#ifdef STANDALONE
#include <stdio.h>
#include <unistd.h>

char *program_name = "unmime";

#define BUFSIZE_INCREMENT 4096

#ifdef DEBUG
#define DBG_FWRITE(B,L,BS,FD) fwrite(B, L, BS, FD)
#else
#define DBG_FWRITE(B,L,BS,FD)
#endif

int main(int argc, char *argv[])
{
  unsigned int BufSize;
  unsigned char *buffer, *buf_p;
  int nl_count, i, bodytype;

#ifdef DEBUG
  pid_t pid;
  FILE *fd_orig, *fd_conv;
  char fnam[100];

  pid = getpid();
  sprintf(fnam, "/tmp/i_unmime.%x", pid);
  fd_orig = fopen(fnam, "w");
  sprintf(fnam, "/tmp/o_unmime.%x", pid);
  fd_conv = fopen(fnam, "w");
#endif

  BufSize = BUFSIZE_INCREMENT;    /* Initial size of buffer */
  buf_p = buffer = (unsigned char *) xmalloc(BufSize);
  nl_count = 0;

  do {
    i = fread(buf_p, 1, 1, stdin);
    switch (*buf_p) {
     case '\n':
       nl_count++;
       break;

     case '\r':
       break;

     default:
       nl_count = 0;
       break;
    }

    buf_p++;
    if ((buf_p - buffer) == BufSize) {
       /* Buffer is full! Get more room. */
       buffer = xrealloc(buffer, BufSize+BUFSIZE_INCREMENT);
       buf_p = buffer + BufSize;
       BufSize += BUFSIZE_INCREMENT;
    }
  } while ((i > 0) && (nl_count < 2));

  *buf_p = '\0';
  DBG_FWRITE(buffer, strlen(buffer), 1, fd_orig);

  UnMimeHeader(buffer);
  bodytype = MimeBodyType(buffer, 1);

  i = strlen(buffer);
  fwrite(buffer, i, 1, stdout);
  DBG_FWRITE(buffer, i, 1, fd_conv);
  
  do {
     buf_p = (buffer - 1);
     do {
        buf_p++;
        i = fread(buf_p, 1, 1, stdin);
     } while ((i == 1) && (*buf_p != '\n'));
     if (i == 1) buf_p++;
     *buf_p = '\0';
     DBG_FWRITE(buf, (buf_p - buffer), 1, fd_orig);

     if (buf_p > buffer) {
        if (bodytype & MSG_NEEDS_DECODE) {
           buf_p = buffer;
           UnMimeBodyline(&buf_p, 0);
        }
        fwrite(buffer, (buf_p - buffer), 1, stdout);
        DBG_FWRITE(buffer, (buf_p - buffer), 1, fd_conv);
     }
  } while (buf_p > buffer);

  free(buffer);
  fflush(stdout);

#ifdef DEBUG
  fclose(fd_orig);
  fclose(fd_conv);
#endif

  return 0;
}
#endif