From 8e5c8700572d74629eb9edb1090a83c0d316d633 Mon Sep 17 00:00:00 2001 From: Nikolaus Schulz Date: Tue, 13 Nov 2007 21:13:38 +0000 Subject: guess_delivery_time(): * look for the timestamp of the latest 'Received' header before resorting to 'Date' or 'Resent-Date'. * let 'Resent-date' header take precedence over 'Date'. Document these changes in manpage and changelog. Closes: #1481316, #1764855, Debian bug #272666. --- CHANGELOG | 6 ++++++ archivemail.1 | 7 ++++--- archivemail.py | 14 ++++++++++---- archivemail.sgml | 7 ++++--- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5fbe4d7..adb1a50 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,12 @@ Version 0.7.3 - UNRELEASED * Fixed date header parsing to be precise with timezone information. Also, when writing From_ line timestamps, don't use UTC but local time, without timezone information. + * To determine the delivery date of a message, archivemail now looks for the + timestamp of the latest 'Received' header before resorting to 'Date' or + 'Resent-Date'. This should give much better results when there is no + 'Delivery-date' header. (Thanks Andrew Ferrier & Christian Brabandt) + Closes: #1481316, #1764855, Debian bug #272666. + * If present, the 'Resent-date' header now takes precedence over 'Date'. Version 0.7.2 - 9 November 2007 diff --git a/archivemail.1 b/archivemail.1 index 0c8a07c..66b3b81 100644 --- a/archivemail.1 +++ b/archivemail.1 @@ -3,7 +3,7 @@ .\" .\" Please send any bug reports, improvements, comments, patches, .\" etc. to Steve Cheng . -.TH "ARCHIVEMAIL" "1" "07 November 2007" "SP" "" +.TH "ARCHIVEMAIL" "1" "13 November 2007" "SP" "" .SH NAME archivemail \- archive and compress your old email @@ -260,8 +260,9 @@ links or create tempfiles or archives in world-writable directories. \fBarchivemail\fR attempts to find the delivery date of a message by looking for valid dates in the following headers, in order of precedence: \fBDelivery-date\fR, -\fBDate\fR and -\fBResent-Date\fR\&. +\fBReceived\fR, +\fBResent-Date\fR and +\fBDate\fR\&. If it cannot find any valid date in these headers, it will use the last-modified file timestamp on \fBMH\fR and \fBMaildir\fR format mailboxes, or the date on the diff --git a/archivemail.py b/archivemail.py index ae4a7a0..a60834d 100755 --- a/archivemail.py +++ b/archivemail.py @@ -777,14 +777,20 @@ def guess_delivery_time(message): assert(message) # try to guess the delivery date from various headers # get more desparate as we go through the array - for header in ('Delivery-date', 'Date', 'Resent-Date'): - try: - date = message.getdate_tz(header) + for header in 'Delivery-date', 'Received', 'Resent-Date', 'Date': + try: + if header == 'Received': + # This should be good enough for almost all headers in the wild; + # if we're guessing wrong, parsedate_tz() will fail graciously. + token = message.getrawheader(header).rsplit(';', 1)[-1] + else: + token = message.get(header) + date = rfc822.parsedate_tz(token) if date: time_message = rfc822.mktime_tz(date) vprint("using valid time found from '%s' header" % header) return time_message - except (IndexError, ValueError, OverflowError): pass + except (AttributeError, IndexError, ValueError, OverflowError): pass # as a second-last resort, try the date from the 'From_' line (ugly) # this will only work from a mbox-format mailbox if (message.unixfrom): diff --git a/archivemail.sgml b/archivemail.sgml index 9be6c5d..69188b8 100644 --- a/archivemail.sgml +++ b/archivemail.sgml @@ -35,7 +35,7 @@ -7 November 2007 +13 November 2007 archivemail @@ -404,8 +404,9 @@ links or create tempfiles or archives in world-writable directories.