aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Rodger <paul@paulrodger.com>2002-03-31 01:31:11 +0000
committerPaul Rodger <paul@paulrodger.com>2002-03-31 01:31:11 +0000
commit1b7ab09f24f3d70b1d89291c64f13812b5557e92 (patch)
tree70c004eaed9b5ef29e6e940f827ce3cb9fbebb61
parent902a81b4bcf1b133b434322370996c2cd30e0f26 (diff)
downloadarchivemail-1b7ab09f24f3d70b1d89291c64f13812b5557e92.tar.gz
archivemail-1b7ab09f24f3d70b1d89291c64f13812b5557e92.tar.bz2
archivemail-1b7ab09f24f3d70b1d89291c64f13812b5557e92.zip
Added maildir support, cache message-Ids, and lots of other stuff.
-rw-r--r--TODO10
-rwxr-xr-xarchivemail.py797
2 files changed, 534 insertions, 273 deletions
diff --git a/TODO b/TODO
index 372d861..61c6cd5 100644
--- a/TODO
+++ b/TODO
@@ -1,16 +1,16 @@
-add Maildir support
+test exclusive locking works with another test process
-add MH support
-
-start using private variables?
+add MH mailbox support
finish man page
add option to archive depending on mailbox size threshold
+ is this a good idea?
+add option to archive depending on number of messages
+ + is this a good idea?
-perserve atime of mailbox properly
+perserve atime of original mailbox properly
lock any original .gz files (?)
diff --git a/archivemail.py b/archivemail.py
index bae40a6..25354bc 100755
--- a/archivemail.py
+++ b/archivemail.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python -tt
+#! /usr/bin/env python
############################################################################
# Copyright (C) 2002 Paul Rodger <paul@paulrodger.com>
#
@@ -17,114 +17,170 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
############################################################################
-"""Archive and compress old mail in mbox-format mailboxes"""
+"""
+Archive and compress old mail in mbox or maildir-format mailboxes.
+Website: http://archivemail.sourceforge.net/
+"""
import atexit
import fcntl
import getopt
import mailbox
import os
-import re
import rfc822
+import signal
import string
import sys
import tempfile
import time
-# globals
-VERSION = "archivemail v0.1.0"
-COPYRIGHT = """Copyright (C) 2002 Paul Rodger <paul@paulrodger.com>
+# global administrivia
+__version__ = "archivemail v0.10"
+__rcs_id__ = "$Id$"
+__copyright__ = """Copyright (C) 2002 Paul Rodger <paul@paulrodger.com>
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."""
-options = None # global instance of the run-time options class
-stale = None # list of files to delete on abnormal exit
+_stale = None # list of files to delete on abnormal exit
############## class definitions ###############
class Stats:
- """collect and print statistics per mailbox"""
- archived = 0
- mailbox_name = None
- archive_name = None
- start_time = 0
- total = 0
+ """Class to collect and print statistics about mailbox archival"""
+ __archived = 0
+ __mailbox_name = None
+ __archive_name = None
+ __start_time = 0
+ __total = 0
def __init__(self, mailbox_name, final_archive_name):
- """constructor for a new set of statistics - the mailbox names are
- only used for printing a friendly message"""
- self.start_time = time.time()
- self.mailbox_name = mailbox_name
- self.archive_name = final_archive_name + options.compressor_extension
+ """Constructor for a new set of statistics.
+
+ Arguments:
+ mailbox_name -- filename/dirname of the original mailbox
+ final_archive_name -- filename for the final 'mbox' archive, without
+ compression extension (eg .gz)
+
+ """
+ assert(mailbox_name)
+ assert(final_archive_name)
+ self.__start_time = time.time()
+ self.__mailbox_name = mailbox_name
+ self.__archive_name = final_archive_name + _options.compressor_extension
def another_message(self):
- self.total = self.total + 1
+ """Add one to the internal count of total messages processed"""
+ self.__total = self.__total + 1
def another_archived(self):
- self.archived = self.archived + 1
+ """Add one to the internal count of messages archived"""
+ self.__archived = self.__archived + 1
def display(self):
- """Display one line of archive statistics for the mailbox"""
+ """Print statistics about how many messages were archived"""
end_time = time.time()
- time_seconds = end_time - self.start_time
+ time_seconds = end_time - self.__start_time
action = "archived"
- if options.delete_old_mail:
+ if _options.delete_old_mail:
action = "deleted"
+ if _options.dry_run:
+ action = "I would have " + action
print "%s: %s %d of %d message(s) in %.1f seconds" % \
- (self.mailbox_name, action, self.archived, self.total,
+ (self.__mailbox_name, action, self.__archived, self.__total,
time_seconds)
class StaleFiles:
- """container for remembering stale files to delete on abnormal exit"""
+ """Class to keep track of files to be deleted on abnormal exit"""
archive = None # tempfile for messages to be archived
compressed_archive = None # compressed version of the above
procmail_lock = None # original_mailbox.lock
retain = None # tempfile for messages to be retained
+ def clean(self):
+ """Delete any temporary files or lockfiles that exist"""
+ if self.procmail_lock:
+ vprint("removing stale procmail lock '%s'" % self.procmail_lock)
+ try: os.unlink(self.procmail_lock)
+ except (IOError, OSError): pass
+ if self.retain:
+ vprint("removing stale retain file '%s'" % self.retain)
+ try: os.unlink(self.retain)
+ except (IOError, OSError): pass
+ if self.archive:
+ vprint("removing stale archive file '%s'" % self.archive)
+ try: os.unlink(self.archive)
+ except (IOError, OSError): pass
+ if self.compressed_archive:
+ vprint("removing stale compressed archive file '%s'" %
+ self.compressed_archive)
+ try: os.unlink(self.compressed_archive)
+ except (IOError, OSError): pass
+
class Options:
- """container for storing and setting our runtime options"""
+ """Class to store runtime options, including defaults"""
archive_suffix = "_archive"
+ warn_duplicates = 1
compressor = None
compressor_extension = None
days_old_max = 180
delete_old_mail = 0
- lockfile_attempts = 5 # 5 seconds of waiting
+ dry_run = 0
+ lockfile_attempts = 5
lockfile_extension = ".lock"
+ lockfile_sleep = 1
+ output_dir = None
quiet = 0
script_name = os.path.basename(sys.argv[0])
+ use_modify_time = 0
verbose = 0
def parse_args(self, args, usage):
- """set our runtime options from the command-line arguments"""
+ """Set our runtime options from the command-line arguments.
+
+ Arguments:
+ args -- this is sys.argv[1:]
+ usage -- a usage message to display on '--help' or bad arguments
+
+ Returns the remaining command-line arguments that have not yet been
+ parsed as a string.
+
+ """
try:
- opts, args = getopt.getopt(args, '?IVZd:hqs:vz',
- ["bzip2", "compress", "days=", "delete", "gzip",
- "help", "quiet", "suffix", "verbose",
- "version"])
+ opts, args = getopt.getopt(args, '?IVZd:hmno:qs:vz',
+ ["bzip2", "compress", "days=", "delete",
+ "dry-run", "gzip", "help", "output-dir=",
+ "quiet", "suffix", "modify-time", "verbose",
+ "version"])
except getopt.error, msg:
user_error(msg)
for o, a in opts:
if o == '--delete':
self.delete_old_mail = 1
+ if o in ('-n', '--dry-run'):
+ self.dry_run = 1
if o in ('-d', '--days'):
self.days_old_max = string.atoi(a)
if (self.days_old_max < 1):
user_error("argument to -d must be greater than zero")
if (self.days_old_max >= 10000):
user_error("argument to -d must be less than 10000")
+ if o in ('-o', '--output-dir'):
+ self.output_dir = a
if o in ('-h', '-?', '--help'):
print usage
sys.exit(0)
if o in ('-q', '--quiet'):
self.quiet = 1
+ if o in ('-m', '--modify-time'):
+ self.use_modify_time = 1
if o in ('-v', '--verbose'):
self.verbose = 1
if o in ('-s', '--suffix'):
self.archive_suffix = a
if o in ('-V', '--version'):
- print VERSION + "\n\n" + COPYRIGHT
+ print __version__ + "\n\n" + __copyright__
sys.exit(0)
if o in ('-z', '--gzip'):
if (self.compressor):
@@ -149,149 +205,191 @@ class Options:
return args
-class Mailbox:
- """ generic read/writable 'mbox' format mailbox file"""
- count = 0
- file = None
- mbox = None
+class Mbox(mailbox.PortableUnixMailbox):
+ """Class that allows read/write access to a 'mbox' mailbox.
+ Subclasses the mailbox.PortableUnixMailbox class.
+ """
+
+ mbox_file = None # file handle for the mbox file
+
+ def __init__(self, path_name):
+ """Constructor for opening an existing 'mbox' mailbox.
+ Extends constructor for mailbox.PortableUnixMailbox()
- def __init__(self):
- """constructor: doesn't do much"""
- pass
+ Arguments:
+ path_name -- file name of the 'mbox' file to be opened
- def store(self, msg):
- """write one message to the mbox file"""
- vprint("saving message to file '%s'" % self.file.name)
- assert(msg.unixfrom)
- self.file.write(msg.unixfrom)
+ """
+ assert(path_name)
+ try:
+ self.mbox_file = open(path_name, "r")
+ except IOError, msg:
+ unexpected_error(msg)
+ mailbox.PortableUnixMailbox.__init__(self, self.mbox_file)
+
+ def write(self, msg):
+ """Write a rfc822 message object to the 'mbox' mailbox.
+ If the rfc822 has no Unix 'From_' line, then one is constructed
+ from other headers in the message.
+
+ Arguments:
+ msg -- rfc822 message object to be written
+
+ """
+ assert(msg)
+ vprint("saving message to file '%s'" % self.mbox_file.name)
+ unix_from = msg.unixfrom
+ if not unix_from:
+ unix_from = make_mbox_from(msg)
+ self.mbox_file.write(unix_from)
assert(msg.headers)
- self.file.writelines(msg.headers)
- self.file.write("\n")
+ self.mbox_file.writelines(msg.headers)
+ self.mbox_file.write(os.linesep)
# The following while loop is about twice as fast in
- # practice to 'self.file.writelines(msg.fp.readlines())'
+ # practice to 'self.mbox_file.writelines(msg.fp.readlines())'
while 1:
body = msg.fp.read(8192)
if not body:
break
- self.file.write(body)
- self.count = self.count + 1
+ self.mbox_file.write(body)
def unlink(self):
- """destroy the whole thing"""
- if self.file:
- file_name = self.file.name
- self.close()
- vprint("unlinking file '%s'" % self.file.name)
- os.unlink(file_name)
-
- def get_size(self):
- """determine file size of this mbox file"""
- assert(self.file.name)
- return os.path.getsize(self.file.name)
+ """Close and delete the 'mbox' mailbox file"""
+ file_name = self.mbox_file.name
+ self.close()
+ vprint("unlinking file '%s'" % self.mbox_file.name)
+ os.unlink(file_name)
+
+ def is_empty(self):
+ """Return true if the 'mbox' file is empty, false otherwise"""
+ return (os.path.getsize(self.mbox_file.name) == 0)
def close(self):
- """close the mbox file"""
- if not self.file.closed:
- vprint("closing file '%s'" % self.file.name)
- self.file.close()
-
- def read_message(self):
- """read one rfc822 message object from the mbox file"""
- if not self.mbox:
- self.file.seek(0)
- self.mbox = mailbox.UnixMailbox(self.file)
- assert(self.mbox)
- message = self.mbox.next()
- return message
+ """Close the mbox file"""
+ if not self.mbox_file.closed:
+ vprint("closing file '%s'" % self.mbox_file.name)
+ self.mbox_file.close()
def exclusive_lock(self):
- """set an advisory lock on the whole mbox file"""
- vprint("obtaining exclusive lock on file '%s'" % self.file.name)
- fcntl.flock(self.file, fcntl.LOCK_EX)
+ """Set an advisory lock on the 'mbox' mailbox"""
+ vprint("obtaining exclusive lock on file '%s'" % self.mbox_file.name)
+ fcntl.flock(self.mbox_file, fcntl.LOCK_EX)
def exclusive_unlock(self):
- """unset any advisory lock on the mbox file"""
- vprint("dropping exclusive lock on file '%s'" % self.file.name)
- fcntl.flock(self.file, fcntl.LOCK_UN)
+ """Unset any advisory lock on the 'mbox' mailbox"""
+ vprint("dropping exclusive lock on file '%s'" % self.mbox_file.name)
+ fcntl.flock(self.mbox_file, fcntl.LOCK_UN)
def procmail_lock(self):
- """create a procmail-style .lock file to prevent clashes"""
- lock_name = self.file.name + options.lockfile_extension
+ """Create a procmail lockfile on the 'mbox' mailbox"""
+ lock_name = self.mbox_file.name + _options.lockfile_extension
attempt = 0
while os.path.isfile(lock_name):
vprint("lockfile '%s' exists - sleeping..." % lock_name)
- time.sleep(1)
+ time.sleep(_options.lockfile_sleep)
attempt = attempt + 1
- if (attempt >= options.lockfile_attempts):
- user_error("Giving up waiting for procmail lock '%s'" % lock_name)
+ if (attempt >= _options.lockfile_attempts):
+ unexpected_error("Giving up waiting for procmail lock '%s'"
+ % lock_name)
vprint("writing lockfile '%s'" % lock_name)
lock = open(lock_name, "w")
- stale.procmail_lock = lock_name
+ _stale.procmail_lock = lock_name
lock.close()
def procmail_unlock(self):
- """delete our procmail-style .lock file"""
- lock_name = self.file.name + options.lockfile_extension
+ """Delete the procmail lockfile on the 'mbox' mailbox"""
+ assert(self.mbox_file.name)
+ lock_name = self.mbox_file.name + _options.lockfile_extension
vprint("removing lockfile '%s'" % lock_name)
os.unlink(lock_name)
- stale.procmail_lock = None
+ _stale.procmail_lock = None
def leave_empty(self):
- """This should be the same as 'cp /dev/null mailbox'.
- This will leave a zero-length mailbox file so that mail
- reading programs don't get upset that the mailbox has been
- completely deleted."""
- vprint("turning '%s' into a zero-length file" % self.file.name)
- atime = os.path.getatime(self.file.name)
- mtime = os.path.getmtime(self.file.name)
- blank_file = open(self.file.name, "w")
+ """Replace the 'mbox' mailbox with a zero-length file.
+ This should be the same as 'cp /dev/null mailbox'.
+ This will leave a zero-length mailbox file so that mail
+ reading programs don't get upset that the mailbox has been
+ completely deleted."""
+ assert(os.path.isfile(self.mbox_file.name))
+ vprint("turning '%s' into a zero-length file" % self.mbox_file.name)
+ atime = os.path.getatime(self.mbox_file.name)
+ mtime = os.path.getmtime(self.mbox_file.name)
+ blank_file = open(self.mbox_file.name, "w")
blank_file.close()
- os.utime(self.file.name, (atime, mtime)) # reset to original timestamps
+ os.utime(self.mbox_file.name, (atime, mtime)) # to original timestamps
+
+class RetainMbox(Mbox):
+ """Class for holding messages that will be retained from the original
+ mailbox (ie. the messages are not considered 'old'). Extends the 'Mbox'
+ class. This 'mbox' file starts off as a temporary file but will eventually
+ overwrite the original mailbox if everything is OK.
+
+ """
+ __final_name = None
+ def __init__(self, final_name):
+ """Constructor - create a temporary file for the mailbox.
+
+ Arguments:
+ final_name -- the name of the original mailbox that this mailbox
+ will replace when we call finalise()
-class RetainMailbox(Mailbox):
- """a temporary mailbox for holding messages that will be retained in the
- original mailbox"""
- def __init__(self):
- """constructor - create the temporary file"""
+ """
+ assert(final_name)
temp_name = tempfile.mktemp("archivemail_retain")
- self.file = open(temp_name, "w")
- stale.retain = temp_name
- vprint("opened temporary retain file '%s'" % self.file.name)
+ self.mbox_file = open(temp_name, "w")
+ _stale.retain = temp_name
+ vprint("opened temporary retain file '%s'" % self.mbox_file.name)
+ self.__final_name = final_name
- def finalise(self, final_name):
- """constructor - create the temporary file"""
+ def finalise(self):
+ """Overwrite the original mailbox with this temporary mailbox."""
+ assert(self.__final_name)
self.close()
-
- atime = os.path.getatime(final_name)
- mtime = os.path.getmtime(final_name)
-
- vprint("renaming '%s' to '%s'" % (self.file.name, final_name))
- os.rename(self.file.name, final_name)
-
- os.utime(final_name, (atime, mtime)) # reset to original timestamps
- stale.retain = None
+ atime = os.path.getatime(self.__final_name)
+ mtime = os.path.getmtime(self.__final_name)
+ vprint("renaming '%s' to '%s'" % (self.mbox_file.name, self.__final_name))
+ os.rename(self.mbox_file.name, self.__final_name)
+ os.utime(self.__final_name, (atime, mtime)) # reset to original timestamps
+ _stale.retain = None
def unlink(self):
- """Override the base-class version, removing from stalefiles"""
- Mailbox.unlink(self)
- stale.retain = None
+ """Delete this temporary mailbox. Overrides Mbox.unlink()"""
+ Mbox.unlink(self)
+ _stale.retain = None
+
+class ArchiveMbox(Mbox):
+ """Class for holding messages that will be archived from the original
+ mailbox (ie. the messages that are considered 'old'). Extends the 'Mbox'
+ class. This 'mbox' file starts off as a temporary file, extracted from any
+ pre-existing archive. It will eventually overwrite the original archive
+ mailbox if everything is OK.
+
+ """
+ __final_name = None
-class ArchiveMailbox(Mailbox):
- """all messages that are too old go here"""
- final_name = None # this is
def __init__(self, final_name):
- """copy any pre-existing compressed archive to a temp file which we
- use as the new soon-to-be compressed archive"""
+ """Constructor -- extract any pre-existing compressed archive to a
+ temporary file which we use as the new 'mbox' archive for this
+ mailbox.
+
+ Arguments:
+ final_name -- the final name for this archive mailbox. This function
+ will check to see if the filename already exists, and
+ extract it to a temporary file if it does. It will also
+ rename itself to this name when we call finalise()
+
+ """
assert(final_name)
- compressor = options.compressor
- compressedfilename = final_name + options.compressor_extension
+ compressor = _options.compressor
+ compressedfilename = final_name + _options.compressor_extension
if os.path.isfile(final_name):
- user_error("There is already a file named '%s'!" % (final_name))
+ unexpected_error("There is already a file named '%s'!" %
+ final_name)
temp_name = tempfile.mktemp("archivemail_archive")
@@ -300,118 +398,172 @@ class ArchiveMailbox(Mailbox):
uncompress = "%s -d -c %s > %s" % (compressor,
compressedfilename, temp_name)
vprint("running uncompressor: %s" % uncompress)
- stale.archive = temp_name
+ _stale.archive = temp_name
system_or_die(uncompress)
- stale.archive = temp_name
- self.file = open(temp_name, "a")
- self.final_name = final_name
+ _stale.archive = temp_name
+ self.mbox_file = open(temp_name, "a")
+ self.__final_name = final_name
def finalise(self):
- """rename the temp file back to the original compressed archive
- file"""
+ """Compress the archive and rename this archive temporary file to the
+ final archive filename, overwriting any pre-existing archive if it
+ exists.
+
+ """
+ assert(self.__final_name)
self.close()
- compressor = options.compressor
- compressed_archive_name = self.file.name + options.compressor_extension
- compress = compressor + " " + self.file.name
+ compressor = _options.compressor
+ compressed_archive_name = self.mbox_file.name + \
+ _options.compressor_extension
+ compress = compressor + " " + self.mbox_file.name
vprint("running compressor: '%s'" % compress)
-
- stale.compressed_archive = compressed_archive_name
+ _stale.compressed_archive = compressed_archive_name
system_or_die(compress)
- stale.archive = None
-
- compressed_final_name = self.final_name + options.compressor_extension
+ _stale.archive = None
+ compressed_final_name = self.__final_name + _options.compressor_extension
vprint("renaming '%s' to '%s'" % (compressed_archive_name,
compressed_final_name))
os.rename(compressed_archive_name, compressed_final_name)
- stale.compressed_archive = None
+ _stale.compressed_archive = None
-class OriginalMailbox(Mailbox):
- """This is the mailbox that we read messages from to determine if they are
- too old. We will never write to this file directly except at the end
- where we override the whole file with the RetainMailbox."""
- file = None
+class IdentityCache:
+ seen_ids = {}
+ mailbox_name = None
def __init__(self, mailbox_name):
- """open the mailbox, ready for reading"""
- try:
- self.file = open(mailbox_name, "r")
- except IOError, msg:
- user_error(msg)
+ assert(mailbox_name)
+ self.mailbox_name = mailbox_name
+ def warn_if_dupe(self, msg):
+ message_id = msg.get('Message-ID')
+ if self.seen_ids.has_key(message_id):
+ user_warning("duplicate message id: '%s' in mailbox '%s'" %
+ (message_id, self.mailbox_name))
+ self.seen_ids[message_id] = 1
+
+
+# global class instances
+_options = Options() # the run-time options object
def main(args = sys.argv[1:]):
- global options
- global stale
+ global _stale
- options = Options()
usage = """Usage: %s [options] mailbox [mailbox...]
-Moves old mail messages in mbox-format mailboxes to compressed mailbox
-archives. This is useful for saving space and keeping your mailbox manageable.
- Options are as follows:
+Moves old mail messages in mbox or maildir-format mailboxes to compressed
+'mbox' mailbox archives. This is useful for saving space and keeping your
+mailbox manageable.
+
+Options are as follows:
-d, --days=<days> archive messages older than <days> days (default: %d)
- -s, --suffix=<name> suffix for archive filename (default: '%s')
- -z, --gzip compress the archive using gzip (default)
- -I, --bzip2 compress the archive using bzip2
- -Z, --compress compress the archive using compress
+ -s, --suffix=NAME suffix for archive filename (default: '%s')
+ -m, --modify-time use file last-modified time as date for maildir messages
+ -n, --dry-run don't write to anything - just show what would be done
+ -o, --output-dir=DIR directory where archive files go (default: current)
+ --ignore-dupe don't warn about mailboxes with duplicates messages
+ -z, --gzip compress the archive(s) using gzip (default)
+ -I, --bzip2 compress the archive(s) using bzip2
+ -Z, --compress compress the archive(s) using compress
--delete delete rather than archive old mail (use with caution!)
-v, --verbose report lots of extra debugging information
-q, --quiet quiet mode - print no statistics (suitable for crontab)
-V, --version display version information
-h, --help display this message
+
Example: %s linux-devel
- This will move all messages older than %s days to a file called
+ This will move all messages older than %s days to a 'mbox' mailbox called
'linux-devel_archive.gz', deleting them from the original 'linux-devel'
mailbox. If the 'linux-devel_archive.gz' mailbox already exists, the
newly archived messages are appended.
-""" % (options.script_name, options.days_old_max, options.archive_suffix,
- options.script_name, options.days_old_max)
+
+Report bugs to <paul@paulrodger.com>. """ % (_options.script_name,
+ _options.days_old_max, _options.archive_suffix, _options.script_name,
+ _options.days_old_max)
check_python_version()
- args = options.parse_args(args, usage)
+ args = _options.parse_args(args, usage)
if len(args) == 0:
print usage
sys.exit(1)
os.umask(077) # saves setting permissions on mailboxes/tempfiles
- stale = StaleFiles()
- atexit.register(clean_up)
- for filename in args:
- tempfile.tempdir = os.path.dirname(filename) # don't use /var/tmp
- final_archive_name = filename + options.archive_suffix
- archive_mailbox(mailbox_name = filename,
- final_archive_name = final_archive_name)
+ # Make sure we clean up nicely - we don't want to leave stale procmail
+ # lockfiles about if something bad happens to us. This is quite
+ # important, even though procmail will delete stale files after a while.
+ _stale = StaleFiles() # remember what we have to delete
+ atexit.register(clean_up) # delete stale files on exceptions/normal exit
+ signal.signal(signal.SIGHUP, clean_up_signal) # signal 1
+ # SIGINT (signal 2) is handled as a python exception
+ signal.signal(signal.SIGQUIT, clean_up_signal) # signal 3
+ signal.signal(signal.SIGTERM, clean_up_signal) # signal 15
+ for mailbox_path in args:
+ archive(mailbox_path)
######## errors and debug ##########
def vprint(string):
- """this saves putting 'if (verbose) print foo' everywhere"""
- if options.verbose:
+ """Print the string argument if we are in verbose mode"""
+ if _options.verbose:
print string
-def user_error(string):
- """fatal error, probably something the user did wrong"""
- script_name = options.script_name
- message = "%s: %s\n" % (script_name, string)
+def unexpected_error(string):
+ """Print the string argument, a 'shutting down' message and abort -
+ this function never returns"""
+ sys.stderr.write("%s: %s\n" % (_options.script_name, string))
+ sys.stderr.write("%s: unexpected error encountered - shutting down\n" %
+ _options.script_name)
+ sys.exit(1)
- sys.stderr.write(message)
+
+def user_error(string):
+ """Print the string argument and abort - this function never returns"""
+ sys.stderr.write("%s: %s\n" % (_options.script_name, string))
sys.exit(1)
+
+def user_warning(string):
+ """Print the string argument"""
+ sys.stderr.write("%s: Warning - %s\n" % (_options.script_name, string))
+
########### operations on a message ############
-def is_too_old(message):
- """return true if a message is too old (and should be archived),
- false otherwise"""
+def make_mbox_from(message):
+ """Return a string suitable for use as a 'From_' mbox header for the
+ message.
+
+ Arguments:
+ message -- the rfc822 message object
+
+ """
+ assert(message)
+ address_header = message.get('Return-path')
+ if not address_header:
+ vprint("make_mbox_from: no Return-path -- using 'From:' instead!")
+ address_header = message.get('From')
+ (name, address) = rfc822.parseaddr(address_header)
+ date = rfc822.parsedate(message.get('Delivery-date'))
+ if not date:
+ date = rfc822.parsedate(message.get('Date'))
+ date_string = time.asctime(date)
+ mbox_from = "From %s %s\n" % (address, date_string)
+ return mbox_from
+
+def get_date_mtime(message):
+ """Return the delivery date of an rfc822 message in a maildir mailbox"""
+ vprint("using last-modification time of message file")
+ return os.path.getmtime(message.fp.name)
+
+def get_date_headers(message):
+ """Return the delivery date of an rfc822 message in a mbox mailbox"""
date = message.getdate('Date')
delivery_date = message.getdate('Delivery-date')
use_date = None
time_message = None
-
if delivery_date:
try:
time_message = time.mktime(delivery_date)
@@ -427,20 +579,27 @@ def is_too_old(message):
except ValueError:
pass
if not use_date:
- print message
- vprint("no valid dates found for message")
- return 0
+ unexpected_error("no valid dates found for message")
+ return time_message
+
+def is_too_old(time_message):
+ """Return true if a message is too old (and should be archived),
+ false otherwise.
+
+ Arguments:
+ time_message -- the delivery date of the message measured in seconds
+ since the epoch
+
+ """
+ assert(time_message)
time_now = time.time()
if time_message > time_now:
- time_string = time.asctime(use_date)
- vprint("warning: message has date in the future: %s !" % time_string)
+ vprint("warning: message has date in the future")
return 0
-
- secs_old_max = (options.days_old_max * 24 * 60 * 60)
+ secs_old_max = (_options.days_old_max * 24 * 60 * 60)
days_old = (time_now - time_message) / 24 / 60 / 60
vprint("message is %.2f days old" % days_old)
-
if ((time_message + secs_old_max) < time_now):
return 1
return 0
@@ -448,120 +607,222 @@ def is_too_old(message):
############### mailbox operations ###############
-def archive_mailbox(mailbox_name, final_archive_name):
- """process and archive the given mailbox name"""
+def archive(mailbox_name):
+ """Archives a mailbox.
+
+ Arguments:
+ mailbox_name -- the filename/dirname of the mailbox to be archived
+ final_archive_name -- the filename of the 'mbox' mailbox to archive
+ old messages to - appending if the archive
+ already exists
+
+ """
+ tempfile.tempdir = choose_temp_dir(mailbox_name)
+ vprint("set tempfile directory to '%s'" % tempfile.tempdir)
+
+ final_archive_name = mailbox_name + _options.archive_suffix
+ if _options.output_dir:
+ final_archive_name = os.path.join(_options.output_dir,
+ final_archive_name)
+ vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name))
+
+ if os.path.islink(mailbox_name):
+ unexpected_error("'%s' is a symbolic link -- I am nervous" %
+ mailbox_name)
+ elif os.path.isfile(mailbox_name):
+ vprint("guessing mailbox is of type: mbox")
+ _archive_mbox(mailbox_name, final_archive_name)
+ elif os.path.isdir(mailbox_name):
+ cur_path = os.path.join(mailbox_name, "cur")
+ new_path = os.path.join(mailbox_name, "new")
+ if os.path.isdir(cur_path) and os.path.isdir(new_path):
+ vprint("guessing mailbox is of type: maildir")
+ _archive_maildir(mailbox_name, final_archive_name)
+ else:
+ vprint("guessing mailbox is of type: MH")
+ _archive_mh(mailbox_name, final_archive_name)
+ else:
+ user_error("'%s': no such file or directory" % mailbox_name)
+
+
+def _archive_mbox(mailbox_name, final_archive_name):
+ """Archive a 'mbox' style mailbox - used by archive_mailbox()
+
+ Arguments:
+ mailbox_name -- the filename/dirname of the mailbox to be archived
+ final_archive_name -- the filename of the 'mbox' mailbox to archive
+ old messages to - appending if the archive
+ already exists
+ """
archive = None
retain = None
-
- vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name))
stats = Stats(mailbox_name, final_archive_name)
-
- original = OriginalMailbox(mailbox_name)
- if original.get_size() == 0:
- original.close()
- vprint("skipping '%s' because it is a zero-length file" %
- original.file.name)
- if not options.quiet:
- stats.display()
- return
+ original = Mbox(mailbox_name)
+ cache = IdentityCache(mailbox_name)
original.procmail_lock()
original.exclusive_lock()
-
- msg = original.read_message()
- if not msg:
- user_error("file '%s' is not in 'mbox' format" % mailbox.file.name)
-
+ msg = original.next()
while (msg):
stats.another_message()
- message_id = msg.get('Message-ID')
- vprint("processing message '%s'" % message_id)
- if is_too_old(msg):
+ vprint("processing message '%s'" % msg.get('Message-ID'))
+ if _options.warn_duplicates:
+ cache.warn_if_dupe(msg)
+ time_message = get_date_headers(msg)
+ if is_too_old(time_message):
stats.another_archived()
- if options.delete_old_mail:
+ if _options.delete_old_mail:
vprint("decision: delete message")
else:
vprint("decision: archive message")
- if (not archive):
- archive = ArchiveMailbox(final_archive_name)
- archive.store(msg)
+ if not _options.dry_run:
+ if (not archive):
+ archive = ArchiveMbox(final_archive_name)
+ archive.write(msg)
else:
vprint("decision: retain message")
- if (not retain):
- retain = RetainMailbox()
- retain.store(msg)
- msg = original.read_message()
+ if not _options.dry_run:
+ if (not retain):
+ retain = RetainMbox(mailbox_name)
+ retain.write(msg)
+ msg = original.next()
vprint("finished reading messages")
-
original.exclusive_unlock()
original.close()
+ if not _options.dry_run:
+ if retain: retain.close()
+ if archive: archive.close()
+ if _options.delete_old_mail:
+ # we will never have an archive file
+ if retain:
+ retain.finalise(mailbox_name)
+ else:
+ # nothing was retained - everything was deleted
+ original.leave_empty()
+ elif archive:
+ archive.finalise()
+ if retain:
+ retain.finalise()
+ else:
+ # nothing was retained - everything was deleted
+ original.leave_empty()
+ else:
+ # There was nothing to archive
+ if retain:
+ # retain will be the same as original mailbox
+ retain.unlink()
+ original.procmail_unlock()
+ if not _options.quiet:
+ stats.display()
+
- if options.delete_old_mail:
- # we will never have an archive file
- if retain:
- retain.finalise(mailbox_name)
+def _archive_maildir(mailbox_name, final_archive_name):
+ """Archive a 'maildir' style mailbox - used by archive_mailbox()"""
+ archive = None
+ stats = Stats(mailbox_name, final_archive_name)
+ original = mailbox.Maildir(mailbox_name)
+ cache = IdentityCache(mailbox_name)
+ assert(original)
+ msg = original.next()
+ assert(msg)
+ delete_queue = []
+ while (msg):
+ stats.another_message()
+ vprint("processing message '%s'" % msg.get('Message-ID'))
+ if _options.warn_duplicates:
+ cache.warn_if_dupe(msg)
+ if _options.use_modify_time:
+ time_message = get_date_mtime(msg)
else:
- original.leave_empty()
- elif archive:
- archive.finalise()
- if retain:
- retain.finalise(mailbox_name)
+ time_message = get_date_headers(msg)
+ if is_too_old(time_message):
+ stats.another_archived()
+ if _options.delete_old_mail:
+ vprint("decision: delete message")
+ else:
+ vprint("decision: archive message")
+ if not _options.dry_run:
+ if (not archive):
+ archive = ArchiveMbox(final_archive_name)
+ archive.write(msg)
+ if not _options.dry_run: delete_queue.append(msg.fp.name)
else:
- original.leave_empty()
- else:
- # There was nothing to archive
- if retain:
- # retain will be the same as original mailbox -- no point copying
- retain.close()
- retain.unlink()
-
- original.procmail_unlock()
- if not options.quiet:
+ vprint("decision: retain message")
+ msg = original.next()
+ vprint("finished reading messages")
+ if not _options.dry_run:
+ if archive:
+ archive.close()
+ archive.finalise()
+ for file_name in delete_queue:
+ try:
+ os.unlink(file_name)
+ except (OSError), msg:
+ # This could happen -- a person could be deleting messages
+ # with a mail reader while this script is running. That
+ # should be ok. How about permission denied problems though?
+ if not _options.quiet:
+ print "unlink warning: %s" % msg
+ if not _options.quiet:
stats.display()
+def _archive_mh(mailbox_name, final_archive_name):
+ """Archive a 'MH' style mailbox - see archive_mailbox()"""
+ unexpected_error("'MH' type mailbox support not yet implemented")
############### misc functions ###############
def clean_up():
- """This is run on exit to make sure we haven't left any stale
- files/lockfiles left on the system"""
+ """Delete stale files -- to be registered with atexit.register()"""
vprint("cleaning up ...")
- if stale.procmail_lock:
- vprint("removing stale procmail lock '%s'" % stale.procmail_lock)
- try: os.unlink(stale.procmail_lock)
- except (IOError, OSError): pass
- if stale.retain:
- vprint("removing stale retain file '%s'" % stale.retain)
- try: os.unlink(stale.retain)
- except (IOError, OSError): pass
- if stale.archive:
- vprint("removing stale archive file '%s'" % stale.archive)
- try: os.unlink(stale.archive)
- except (IOError, OSError): pass
- if stale.compressed_archive:
- vprint("removing stale compressed archive file '%s'" %
- stale.compressed_archive)
- try: os.unlink(stale.compressed_archive)
- except (IOError, OSError): pass
+ _stale.clean()
+
+
+def clean_up_signal(signal_number, stack_frame):
+ """Delete stale files -- to be registered as a signal handler.
+
+ Arguments:
+ signal_number -- signal number of the terminating signal
+ stack_frame -- the current stack frame
+
+ """
+ # this will run the above clean_up(), since unexpected_error()
+ # will abort with sys.exit() and clean_up will be registered
+ # at this stage
+ unexpected_error("received signal %s" % signal_number)
+
+
+def choose_temp_dir(mailbox_path):
+ """Set the directory for temporary files to something safe.
+
+ Arguments:
+ mailbox_path -- path name to the original mailbox
+ """
+ temp_dir = os.path.dirname(mailbox_path)
+ if _options.output_dir:
+ temp_dir = _options.output_dir
+ if not temp_dir:
+ temp_dir = os.curdir # use the current directory
+ return temp_dir
def check_python_version():
- """make sure we are running with the right version of python"""
+ """Abort if we are running on python < v2.0"""
build = sys.version
too_old_error = "requires python v2.0 or greater. Your version is: %s" % build
try:
version = sys.version_info # we might not even have this function! :)
if (version[0] < 2):
- UserError(too_old_error)
+ unexpected_error(too_old_error)
except: # I should be catching more specific exceptions
- UserError(too_old_error)
+ unexpected_error(too_old_error)
def system_or_die(command):
- """Give a user_error() if the command we ran returned a non-zero status"""
+ """Run the command with os.system(), aborting on non-zero exit"""
rv = os.system(command)
if (rv != 0):
status = os.WEXITSTATUS(rv)
- user_error("command '%s' returned status %d" % (command, status))
+ unexpected_error("command '%s' returned status %d" % (command, status))
# this is where it all happens, folks