aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric S. Raymond <esr@thyrsus.com>2003-10-10 20:41:02 +0000
committerEric S. Raymond <esr@thyrsus.com>2003-10-10 20:41:02 +0000
commit9dd8400af9d8cd28f84eaf19f0a6c4aeb7c84b43 (patch)
tree7aac956c7685c95a2f6f00b05e5ca81a491afce8
parent8448bbcc5acf544042af536ffa81a23e65adfadb (diff)
downloadfetchmail-9dd8400af9d8cd28f84eaf19f0a6c4aeb7c84b43.tar.gz
fetchmail-9dd8400af9d8cd28f84eaf19f0a6c4aeb7c84b43.tar.bz2
fetchmail-9dd8400af9d8cd28f84eaf19f0a6c4aeb7c84b43.zip
Dup killer now uses an MD5 hash of the message headers.
svn path=/trunk/; revision=3853
-rw-r--r--NEWS1
-rw-r--r--fetchmail.h2
-rw-r--r--transact.c49
3 files changed, 28 insertions, 24 deletions
diff --git a/NEWS b/NEWS
index 10ec7159..ddb45980 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@
* Benjamin Drieu's patch for Debian bug #156592, incorrect handing of host/port
option.
* Smash all NULs out of headers right after the socket read.
+* Dup-killer code now keys on an MD5 hash of the raw headers.
fetchmail-6.2.4 (Wed Aug 13 04:27:35 EDT 2003), 22625 lines:
diff --git a/fetchmail.h b/fetchmail.h
index f13b1205..e8286d48 100644
--- a/fetchmail.h
+++ b/fetchmail.h
@@ -316,7 +316,7 @@ struct query
struct idlist *oldsaved, *newsaved;
struct idlist **oldsavedend;
char *lastid; /* last Message-ID seen on this connection */
- char *thisid; /* Message-ID of current message */
+ char thisid[16]; /* Message fingerprint for dup killing */
/* internal use -- per-message state */
int mimemsg; /* bitmask indicating MIME body-type */
diff --git a/transact.c b/transact.c
index bddce829..b6cdcc8a 100644
--- a/transact.c
+++ b/transact.c
@@ -29,6 +29,7 @@
#ifdef HAVE_NET_SOCKET_H
#include <net/socket.h>
#endif
+#include "md5.h"
#include "i18n.h"
#include "socket.h"
@@ -406,10 +407,8 @@ int readheaders(int sock,
if (delivered_to)
free(delivered_to);
- /* initially, no message ID */
- if (ctl->thisid)
- free(ctl->thisid);
- ctl->thisid = NULL;
+ /* initially, no message digest */
+ memset(ctl->thisid, '\0', sizeof(ctl->thisid));
msgblk.headers = received_for = delivered_to = NULL;
from_offs = reply_to_offs = resent_from_offs = app_from_offs =
@@ -605,10 +604,6 @@ int readheaders(int sock,
if (linelen != strlen (line))
has_nuls = TRUE;
- /* save the message's ID, we may use it for killing duplicates later */
- if (MULTIDROP(ctl) && !strncasecmp(line, "Message-ID:", 11))
- ctl->thisid = xstrdup(line);
-
/*
* The University of Washington IMAP server (the reference
* implementation of IMAP4 written by Mark Crispin) relies
@@ -735,7 +730,7 @@ int readheaders(int sock,
* make sure we never try to rewrite such a blank Return-Path. We
* handle this with a check for <> in the rewrite logic above.
*
- * Also, if an email has multiple Return-Path: statement, we only
+ * Also, if an email has multiple Return-Path: headers, we only
* read the first occurance, as some spam email has more than one
* Return-Path.
*
@@ -890,6 +885,8 @@ int readheaders(int sock,
if (refuse_mail)
return(PS_REFUSED);
/*
+ * This is the duplicate-message killer code.
+ *
* When mail delivered to a multidrop mailbox on the server is
* addressed to multiple people on the client machine, there will
* be one copy left in the box for each recipient. This is not a
@@ -901,10 +898,10 @@ int readheaders(int sock,
* if the mail is addressed to N people, each recipient will
* get N copies. This is bad when N > 1.
*
- * Foil this by suppressing all but one copy of a message with
- * a given Message-ID. The accept_count test ensures that
- * multiple pieces of email with the same Message-ID, each
- * with a *single* addressee (the N == 1 case), won't be
+ * Foil this by suppressing all but one copy of a message with a
+ * given set of headers. The accept_count test ensures
+ * that multiple pieces of email with the same Message-ID, each
+ * with a *single* addressee (the N == 1 case), won't be
* suppressed.
*
* Note: This implementation only catches runs of successive
@@ -918,20 +915,26 @@ int readheaders(int sock,
* to break it in a way that blackholed mail. Better to pass
* the occasional duplicate than to do that...
*/
- if (!received_for && env_offs == -1 && !delivered_to)
+ if (MULTIDROP(ctl))
{
- if (ctl->lastid && ctl->thisid && !strcasecmp(ctl->lastid, ctl->thisid))
+ MD5_CTX context;
+
+ MD5Init(&context);
+ MD5Update(&context, msgblk.headers, strlen(msgblk.headers));
+ MD5Final(ctl->thisid, &context);
+
+ if (!received_for && env_offs == -1 && !delivered_to)
{
- if (accept_count > 1)
+ /*
+ * Hmmm...can MD5 ever yield all zeroes as a hash value?
+ * If so there is a one in 18-quadrillion chance this
+ * code will incorrectly nuke the first message.
+ */
+ if (memcmp(ctl->lastid, ctl->thisid, sizeof(ctl->lastid)))
+ ctl->lastid = ctl->thisid;
+ else if (accept_count > 1)
return(PS_REFUSED);
}
- else
- {
- if (ctl->lastid)
- free(ctl->lastid);
- ctl->lastid = ctl->thisid;
- ctl->thisid = NULL;
- }
}
/*