aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Rodger <paul@paulrodger.com>2002-04-04 02:41:37 +0000
committerPaul Rodger <paul@paulrodger.com>2002-04-04 02:41:37 +0000
commit1ed695b748f0c53dea845231e3fd03ec1186ee3f (patch)
treedcc702ac259dcc7290a83ef3f269e0f2b50b3f7f
parent8bc23385dca01b161b1c03cc162587225f40bd0f (diff)
downloadarchivemail-1ed695b748f0c53dea845231e3fd03ec1186ee3f.tar.gz
archivemail-1ed695b748f0c53dea845231e3fd03ec1186ee3f.tar.bz2
archivemail-1ed695b748f0c53dea845231e3fd03ec1186ee3f.zip
Since we might not have a parse-able 'Date-Received' or 'Date' field,
use 5 different ways to guess the date of a message.
-rw-r--r--CHANGELOG4
-rw-r--r--TODO5
-rwxr-xr-xarchivemail.py106
3 files changed, 58 insertions, 57 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 2d8d885..1b24531 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,7 @@
+Version 0.2.1 - 3 April 2002
+ * Since we might not have a parse-able 'Date-Received' or 'Date' field,
+ use 5 different ways to guess the date of a message.
+
Version 0.2.0 - 3 April 2002
* Added support for reading from MH mailboxes
* Refuse to proceed if we would be making tempfiles in world-writable
diff --git a/TODO b/TODO
index fefe4a0..35a9431 100644
--- a/TODO
+++ b/TODO
@@ -1,12 +1,9 @@
-Goals for next minor release (0.2.1):
+Goals for next minor release (0.2.2):
-------------------------------------
* Test exclusive locking works with another test process
* Perserve atime of original mailbox properly
* Finish man page
-* Change archivemail so that if a message has no valid 'Date' or
- 'Delivery-Date' header, don't get upset -- try to make do.
- (Esp. if we are using a maildir or MH folder -- use the file timestamp)
Goals for next major release (0.3.0):
-------------------------------------
diff --git a/archivemail.py b/archivemail.py
index 948c116..b06bed3 100755
--- a/archivemail.py
+++ b/archivemail.py
@@ -43,6 +43,7 @@ import fcntl
import getopt
import mailbox
import os
+import re
import rfc822
import signal
import stat
@@ -148,7 +149,6 @@ class Options:
output_dir = os.curdir
quiet = 0
script_name = os.path.basename(sys.argv[0])
- use_modify_time = 0
verbose = 0
warn_duplicates = 0
@@ -164,11 +164,11 @@ class Options:
"""
try:
- opts, args = getopt.getopt(args, '?IVZd:hmno:qs:vz',
+ opts, args = getopt.getopt(args, '?IVZd:hno:qs:vz',
["bzip2", "compress", "days=", "delete",
"dry-run", "gzip", "help", "output-dir=",
- "quiet", "suffix", "modify-time", "verbose",
- "version", "warn-duplicate"])
+ "quiet", "suffix", "verbose", "version",
+ "warn-duplicate"])
except getopt.error, msg:
user_error(msg)
for o, a in opts:
@@ -187,8 +187,6 @@ class Options:
sys.exit(0)
if o in ('-q', '--quiet'):
self.quiet = 1
- if o in ('-m', '--modify-time'):
- self.use_modify_time = 1
if o in ('-v', '--verbose'):
self.verbose = 1
if o in ('-s', '--suffix'):
@@ -499,7 +497,6 @@ Options are as follows:
-Z, --compress compress the archive(s) using compress
--delete delete rather than archive old mail (use with caution!)
--warn-duplicate warn about duplicate Message-IDs in the same mailbox
- -m, --modify-time use file last-modified time as date for maildir messages
-v, --verbose report lots of extra debugging information
-q, --quiet quiet mode - print no statistics (suitable for crontab)
-V, --version display version information
@@ -583,52 +580,58 @@ def make_mbox_from(message):
address_header = message.get('From')
(name, address) = rfc822.parseaddr(address_header)
- date = None
- delivery_date_header = message.get('Delivery-date')
- if delivery_date_header:
- date = rfc822.parsedate(delivery_date_header)
- if not date:
- date_header = message.get('Date')
- if not date_header:
- unexpected_error("message has no 'Date' header")
- date = rfc822.parsedate(date_header)
- if not date:
- unexpected_error("message has no valid 'Date' header")
- date_string = time.asctime(date)
+ time_message = guess_delivery_time(message)
+ assert(time_message)
+ gm_date = time.gmtime(time_message)
+ assert(gm_date)
+ date_string = time.asctime(gm_date)
+
mbox_from = "From %s %s\n" % (address, date_string)
return mbox_from
-def get_date_mtime(message):
- """Return the delivery date of an rfc822 message in a maildir mailbox"""
- assert(message)
- vprint("using last-modification time of message file")
- return os.path.getmtime(message.fp.name)
-
-
-def get_date_headers(message):
- """Return the delivery date of an rfc822 message in a mbox mailbox"""
+def guess_delivery_time(message):
+ """Return a guess at the delivery date of an rfc822 message"""
assert(message)
- date = message.getdate('Date')
- delivery_date = message.getdate('Delivery-date')
- use_date = None
- time_message = None
- if delivery_date:
- try:
- time_message = time.mktime(delivery_date)
- use_date = delivery_date
- vprint("using message 'Delivery-date' header")
- except ValueError:
- pass
- if date and not use_date:
- try:
- time_message = time.mktime(date)
- use_date = date
- vprint("using message 'Date' header")
- except ValueError:
- pass
- if not use_date:
- unexpected_error("no valid dates found for message")
+ # try to guess the delivery date from various headers
+ # get more desparate as we go through the array
+ for header in ('Delivery-date', 'Date', 'Resent-Date'):
+ date = message.getdate(header)
+ if date:
+ try:
+ time_message = time.mktime(date)
+ assert(time_message, 'time.mktime() returned false')
+ vprint("using valid time found from '%s' header" % header)
+ return time_message
+ except (ValueError, OverflowError): pass
+ # as a second-last resort, try the date from the 'From_' line (ugly)
+ # this will only work from a mbox-format mailbox
+ if (message.unixfrom):
+ header = re.sub("From \S+", "", message.unixfrom)
+ header = string.strip(header)
+ date = rfc822.parsedate(header)
+ if date:
+ try:
+ time_message = time.mktime(date)
+ assert(time_message, 'time.mktime() returned false')
+ vprint("using valid time found from unix 'From_' header")
+ return time_message
+ except (ValueError, OverflowError): pass
+ # the headers have no valid dates -- last resort, try the file timestamp
+ # this will not work for mbox mailboxes
+ try:
+ file_name = message.fp.name
+ except AttributeError:
+ # we are looking at a 'mbox' mailbox - argh!
+ # Just return the current time - this will never get archived :(
+ vprint("no valid times found at all -- using current time!")
+ return time.time()
+ if not os.path.isfile(file_name):
+ unexpected_error("mailbox file name '%s' has gone missing" % \
+ file_name)
+ time_message = os.path.getmtime(message.fp.name)
+ vprint("using valid time found from '%s' last-modification time" % \
+ file_name)
return time_message
@@ -720,7 +723,7 @@ def _archive_mbox(mailbox_name, final_archive_name):
vprint("processing message '%s'" % msg.get('Message-ID'))
if _options.warn_duplicates:
cache.warn_if_dupe(msg)
- time_message = get_date_headers(msg)
+ time_message = guess_delivery_time(msg)
if is_too_old(time_message):
stats.another_archived()
if _options.delete_old_mail:
@@ -794,10 +797,7 @@ def _archive_dir(mailbox_name, final_archive_name, type):
vprint("processing message '%s'" % msg.get('Message-ID'))
if _options.warn_duplicates:
cache.warn_if_dupe(msg)
- if _options.use_modify_time:
- time_message = get_date_mtime(msg)
- else:
- time_message = get_date_headers(msg)
+ time_message = guess_delivery_time(msg)
if is_too_old(time_message):
stats.another_archived()
if _options.delete_old_mail: