diff options
| -rw-r--r-- | TODO | 10 | ||||
| -rwxr-xr-x | archivemail.py | 797 | 
2 files changed, 534 insertions, 273 deletions
| @@ -1,16 +1,16 @@ -add Maildir support +test exclusive locking works with another test process -add MH support - -start using private variables? +add MH mailbox support  finish man page  add option to archive depending on mailbox size threshold       + is this a good idea? +add option to archive depending on number of messages +    + is this a good idea? -perserve atime of mailbox properly +perserve atime of original mailbox properly  lock any original .gz files (?) diff --git a/archivemail.py b/archivemail.py index bae40a6..25354bc 100755 --- a/archivemail.py +++ b/archivemail.py @@ -1,4 +1,4 @@ -#!/usr/bin/python -tt +#! /usr/bin/env python  ############################################################################  # Copyright (C) 2002  Paul Rodger <paul@paulrodger.com>  # @@ -17,114 +17,170 @@  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  ############################################################################ -"""Archive and compress old mail in mbox-format mailboxes""" +""" +Archive and compress old mail in mbox or maildir-format mailboxes. +Website: http://archivemail.sourceforge.net/ +"""  import atexit  import fcntl  import getopt  import mailbox  import os -import re  import rfc822 +import signal  import string  import sys  import tempfile  import time -# globals  -VERSION = "archivemail v0.1.0" -COPYRIGHT = """Copyright (C) 2002  Paul Rodger <paul@paulrodger.com> +# global administrivia  +__version__ = "archivemail v0.10" +__rcs_id__ = "$Id$" +__copyright__ = """Copyright (C) 2002  Paul Rodger <paul@paulrodger.com>  This is free software; see the source for copying conditions. There is NO  warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.""" -options = None  # global instance of the run-time options class -stale = None    # list of files to delete on abnormal exit +_stale = None    # list of files to delete on abnormal exit  ############## class definitions ###############  class Stats: -    """collect and print statistics per mailbox""" -    archived = 0 -    mailbox_name = None -    archive_name = None -    start_time = 0 -    total = 0 +    """Class to collect and print statistics about mailbox archival""" +    __archived = 0 +    __mailbox_name = None +    __archive_name = None +    __start_time = 0 +    __total = 0      def __init__(self, mailbox_name, final_archive_name): -        """constructor for a new set of statistics - the mailbox names are -           only used for printing a friendly message""" -        self.start_time = time.time() -        self.mailbox_name = mailbox_name -        self.archive_name = final_archive_name + options.compressor_extension +        """Constructor for a new set of statistics. + +        Arguments:  +        mailbox_name -- filename/dirname of the original mailbox +        final_archive_name -- filename for the final 'mbox' archive, without +                              compression extension (eg .gz) + +        """ +        assert(mailbox_name) +        assert(final_archive_name) +        self.__start_time = time.time() +        self.__mailbox_name = mailbox_name +        self.__archive_name = final_archive_name + _options.compressor_extension      def another_message(self): -        self.total = self.total + 1 +        """Add one to the internal count of total messages processed""" +        self.__total = self.__total + 1      def another_archived(self): -        self.archived = self.archived + 1 +        """Add one to the internal count of messages archived""" +        self.__archived = self.__archived + 1      def display(self): -        """Display one line of archive statistics for the mailbox""" +        """Print statistics about how many messages were archived"""          end_time = time.time() -        time_seconds = end_time - self.start_time +        time_seconds = end_time - self.__start_time          action = "archived" -        if options.delete_old_mail: +        if _options.delete_old_mail:              action = "deleted" +        if _options.dry_run: +            action = "I would have " + action          print "%s: %s %d of %d message(s) in %.1f seconds" % \ -            (self.mailbox_name, action, self.archived, self.total, +            (self.__mailbox_name, action, self.__archived, self.__total,              time_seconds)  class StaleFiles: -    """container for remembering stale files to delete on abnormal exit""" +    """Class to keep track of files to be deleted on abnormal exit"""      archive            = None  # tempfile for messages to be archived      compressed_archive = None  # compressed version of the above      procmail_lock      = None  # original_mailbox.lock      retain             = None  # tempfile for messages to be retained +    def clean(self): +        """Delete any temporary files or lockfiles that exist""" +        if self.procmail_lock: +            vprint("removing stale procmail lock '%s'" % self.procmail_lock) +            try: os.unlink(self.procmail_lock) +            except (IOError, OSError): pass +        if self.retain: +            vprint("removing stale retain file '%s'" % self.retain) +            try: os.unlink(self.retain) +            except (IOError, OSError): pass +        if self.archive: +            vprint("removing stale archive file '%s'" % self.archive) +            try: os.unlink(self.archive) +            except (IOError, OSError): pass +        if self.compressed_archive: +            vprint("removing stale compressed archive file '%s'" % +                self.compressed_archive) +            try: os.unlink(self.compressed_archive) +            except (IOError, OSError): pass +  class Options: -    """container for storing and setting our runtime options""" +    """Class to store runtime options, including defaults"""      archive_suffix       = "_archive" +    warn_duplicates      = 1      compressor           = None      compressor_extension = None      days_old_max         = 180      delete_old_mail      = 0 -    lockfile_attempts    = 5     # 5 seconds of waiting +    dry_run              = 0 +    lockfile_attempts    = 5        lockfile_extension   = ".lock" +    lockfile_sleep       = 1  +    output_dir           = None      quiet                = 0      script_name          = os.path.basename(sys.argv[0]) +    use_modify_time      = 0      verbose              = 0      def parse_args(self, args, usage): -        """set our runtime options from the command-line arguments""" +        """Set our runtime options from the command-line arguments. + +        Arguments: +        args -- this is sys.argv[1:] +        usage -- a usage message to display on '--help' or bad arguments + +        Returns the remaining command-line arguments that have not yet been +        parsed as a string. + +        """          try: -            opts, args = getopt.getopt(args, '?IVZd:hqs:vz',  -                             ["bzip2", "compress", "days=", "delete", "gzip",  -                              "help", "quiet", "suffix", "verbose",  -                              "version"]) +            opts, args = getopt.getopt(args, '?IVZd:hmno:qs:vz',  +                             ["bzip2", "compress", "days=", "delete", +                             "dry-run", "gzip", "help", "output-dir=",  +                             "quiet", "suffix", "modify-time", "verbose",  +                             "version"])          except getopt.error, msg:              user_error(msg)          for o, a in opts:              if o == '--delete':                  self.delete_old_mail = 1 +            if o in ('-n', '--dry-run'): +                self.dry_run = 1              if o in ('-d', '--days'):                  self.days_old_max = string.atoi(a)                  if (self.days_old_max < 1):                      user_error("argument to -d must be greater than zero")                  if (self.days_old_max >= 10000):                      user_error("argument to -d must be less than 10000") +            if o in ('-o', '--output-dir'): +                self.output_dir = a              if o in ('-h', '-?', '--help'):                  print usage                  sys.exit(0)              if o in ('-q', '--quiet'):                  self.quiet = 1 +            if o in ('-m', '--modify-time'): +                self.use_modify_time = 1              if o in ('-v', '--verbose'):                  self.verbose = 1              if o in ('-s', '--suffix'):                  self.archive_suffix = a              if o in ('-V', '--version'): -                print VERSION + "\n\n" + COPYRIGHT +                print __version__ + "\n\n" + __copyright__                  sys.exit(0)              if o in ('-z', '--gzip'):                  if (self.compressor): @@ -149,149 +205,191 @@ class Options:          return args -class Mailbox: -    """ generic read/writable 'mbox' format mailbox file""" -    count = 0 -    file = None -    mbox = None +class Mbox(mailbox.PortableUnixMailbox): +    """Class that allows read/write access to a 'mbox' mailbox.  +    Subclasses the mailbox.PortableUnixMailbox class. +    """ +    +    mbox_file = None   # file handle for the mbox file + +    def __init__(self, path_name): +        """Constructor for opening an existing 'mbox' mailbox. +        Extends constructor for mailbox.PortableUnixMailbox() -    def __init__(self): -        """constructor: doesn't do much""" -        pass +        Arguments: +        path_name -- file name of the 'mbox' file to be opened -    def store(self, msg): -        """write one message to the mbox file""" -        vprint("saving message to file '%s'" % self.file.name) -        assert(msg.unixfrom) -        self.file.write(msg.unixfrom) +        """ +        assert(path_name) +        try: +            self.mbox_file = open(path_name, "r") +        except IOError, msg: +            unexpected_error(msg) +        mailbox.PortableUnixMailbox.__init__(self, self.mbox_file) + +    def write(self, msg): +        """Write a rfc822 message object to the 'mbox' mailbox. +        If the rfc822 has no Unix 'From_' line, then one is constructed +        from other headers in the message. + +        Arguments: +        msg -- rfc822 message object to be written + +        """ +        assert(msg) +        vprint("saving message to file '%s'" % self.mbox_file.name) +        unix_from = msg.unixfrom +        if not unix_from: +            unix_from = make_mbox_from(msg) +        self.mbox_file.write(unix_from)          assert(msg.headers) -        self.file.writelines(msg.headers) -        self.file.write("\n") +        self.mbox_file.writelines(msg.headers) +        self.mbox_file.write(os.linesep)          # The following while loop is about twice as fast in  -        # practice to 'self.file.writelines(msg.fp.readlines())' +        # practice to 'self.mbox_file.writelines(msg.fp.readlines())'          while 1:              body = msg.fp.read(8192)              if not body:                  break -            self.file.write(body) -        self.count = self.count + 1 +            self.mbox_file.write(body)      def unlink(self): -        """destroy the whole thing""" -        if self.file: -            file_name = self.file.name -            self.close() -            vprint("unlinking file '%s'" % self.file.name) -            os.unlink(file_name) - -    def get_size(self): -        """determine file size of this mbox file""" -        assert(self.file.name) -        return os.path.getsize(self.file.name) +        """Close and delete the 'mbox' mailbox file""" +        file_name = self.mbox_file.name +        self.close() +        vprint("unlinking file '%s'" % self.mbox_file.name) +        os.unlink(file_name) + +    def is_empty(self): +        """Return true if the 'mbox' file is empty, false otherwise""" +        return (os.path.getsize(self.mbox_file.name) == 0)      def close(self): -        """close the mbox file""" -        if not self.file.closed: -            vprint("closing file '%s'" % self.file.name) -            self.file.close() - -    def read_message(self): -        """read one rfc822 message object from the mbox file""" -        if not self.mbox: -            self.file.seek(0) -            self.mbox = mailbox.UnixMailbox(self.file) -            assert(self.mbox) -        message = self.mbox.next() -        return message +        """Close the mbox file""" +        if not self.mbox_file.closed: +            vprint("closing file '%s'" % self.mbox_file.name) +            self.mbox_file.close()      def exclusive_lock(self): -        """set an advisory lock on the whole mbox file""" -        vprint("obtaining exclusive lock on file '%s'" % self.file.name) -        fcntl.flock(self.file, fcntl.LOCK_EX) +        """Set an advisory lock on the 'mbox' mailbox""" +        vprint("obtaining exclusive lock on file '%s'" % self.mbox_file.name) +        fcntl.flock(self.mbox_file, fcntl.LOCK_EX)      def exclusive_unlock(self): -        """unset any advisory lock on the mbox file""" -        vprint("dropping exclusive lock on file '%s'" % self.file.name) -        fcntl.flock(self.file, fcntl.LOCK_UN) +        """Unset any advisory lock on the 'mbox' mailbox""" +        vprint("dropping exclusive lock on file '%s'" % self.mbox_file.name) +        fcntl.flock(self.mbox_file, fcntl.LOCK_UN)      def procmail_lock(self): -        """create a procmail-style .lock file to prevent clashes""" -        lock_name = self.file.name + options.lockfile_extension +        """Create a procmail lockfile on the 'mbox' mailbox""" +        lock_name = self.mbox_file.name + _options.lockfile_extension          attempt = 0          while os.path.isfile(lock_name):              vprint("lockfile '%s' exists - sleeping..." % lock_name) -            time.sleep(1) +            time.sleep(_options.lockfile_sleep)              attempt = attempt + 1 -            if (attempt >= options.lockfile_attempts): -                user_error("Giving up waiting for procmail lock '%s'" % lock_name) +            if (attempt >= _options.lockfile_attempts): +                unexpected_error("Giving up waiting for procmail lock '%s'"  +                    % lock_name)          vprint("writing lockfile '%s'" % lock_name)          lock = open(lock_name, "w") -        stale.procmail_lock = lock_name +        _stale.procmail_lock = lock_name          lock.close()      def procmail_unlock(self): -        """delete our procmail-style .lock file""" -        lock_name = self.file.name + options.lockfile_extension +        """Delete the procmail lockfile on the 'mbox' mailbox""" +        assert(self.mbox_file.name) +        lock_name = self.mbox_file.name + _options.lockfile_extension          vprint("removing lockfile '%s'" % lock_name)          os.unlink(lock_name) -        stale.procmail_lock = None +        _stale.procmail_lock = None      def leave_empty(self): -        """This should be the same as 'cp /dev/null mailbox'. -           This will leave a zero-length mailbox file so that mail -           reading programs don't get upset that the mailbox has been -           completely deleted.""" -        vprint("turning '%s' into a zero-length file" % self.file.name) -        atime = os.path.getatime(self.file.name) -        mtime = os.path.getmtime(self.file.name) -        blank_file = open(self.file.name, "w") +        """Replace the 'mbox' mailbox with a zero-length file. +        This should be the same as 'cp /dev/null mailbox'. +        This will leave a zero-length mailbox file so that mail +        reading programs don't get upset that the mailbox has been +        completely deleted.""" +        assert(os.path.isfile(self.mbox_file.name)) +        vprint("turning '%s' into a zero-length file" % self.mbox_file.name) +        atime = os.path.getatime(self.mbox_file.name) +        mtime = os.path.getmtime(self.mbox_file.name) +        blank_file = open(self.mbox_file.name, "w")          blank_file.close() -        os.utime(self.file.name, (atime, mtime)) # reset to original timestamps +        os.utime(self.mbox_file.name, (atime, mtime)) # to original timestamps + +class RetainMbox(Mbox): +    """Class for holding messages that will be retained from the original +    mailbox (ie. the messages are not considered 'old'). Extends the 'Mbox' +    class. This 'mbox' file starts off as a temporary file but will eventually +    overwrite the original mailbox if everything is OK.  +     +    """ +    __final_name = None +    def __init__(self, final_name): +        """Constructor - create a temporary file for the mailbox. +        +        Arguments: +        final_name -- the name of the original mailbox that this mailbox +                      will replace when we call finalise() -class RetainMailbox(Mailbox): -    """a temporary mailbox for holding messages that will be retained in the -       original mailbox""" -    def __init__(self): -        """constructor - create the temporary file""" +        """ +        assert(final_name)          temp_name = tempfile.mktemp("archivemail_retain") -        self.file = open(temp_name, "w") -        stale.retain = temp_name -        vprint("opened temporary retain file '%s'" % self.file.name) +        self.mbox_file = open(temp_name, "w") +        _stale.retain = temp_name +        vprint("opened temporary retain file '%s'" % self.mbox_file.name) +        self.__final_name = final_name -    def finalise(self, final_name): -        """constructor - create the temporary file""" +    def finalise(self): +        """Overwrite the original mailbox with this temporary mailbox.""" +        assert(self.__final_name)          self.close() - -        atime = os.path.getatime(final_name) -        mtime = os.path.getmtime(final_name) - -        vprint("renaming '%s' to '%s'" % (self.file.name, final_name)) -        os.rename(self.file.name, final_name) - -        os.utime(final_name, (atime, mtime)) # reset to original timestamps -        stale.retain = None +        atime = os.path.getatime(self.__final_name) +        mtime = os.path.getmtime(self.__final_name) +        vprint("renaming '%s' to '%s'" % (self.mbox_file.name, self.__final_name)) +        os.rename(self.mbox_file.name, self.__final_name) +        os.utime(self.__final_name, (atime, mtime)) # reset to original timestamps +        _stale.retain = None      def unlink(self): -        """Override the base-class version, removing from stalefiles""" -        Mailbox.unlink(self) -        stale.retain = None +        """Delete this temporary mailbox. Overrides Mbox.unlink()""" +        Mbox.unlink(self) +        _stale.retain = None + +class ArchiveMbox(Mbox): +    """Class for holding messages that will be archived from the original +    mailbox (ie. the messages that are considered 'old'). Extends the 'Mbox' +    class. This 'mbox' file starts off as a temporary file, extracted from any +    pre-existing archive. It will eventually overwrite the original archive +    mailbox if everything is OK.  +     +    """ +    __final_name = None  -class ArchiveMailbox(Mailbox): -    """all messages that are too old go here""" -    final_name = None # this is       def __init__(self, final_name): -        """copy any pre-existing compressed archive to a temp file which we  -           use as the new soon-to-be compressed archive""" +        """Constructor -- extract any pre-existing compressed archive to a +        temporary file which we use as the new 'mbox' archive for this +        mailbox.  +        +        Arguments: +        final_name -- the final name for this archive mailbox. This function +                      will check to see if the filename already exists, and +                      extract it to a temporary file if it does. It will also +                      rename itself to this name when we call finalise() + +        """          assert(final_name) -        compressor = options.compressor -        compressedfilename = final_name + options.compressor_extension +        compressor = _options.compressor +        compressedfilename = final_name + _options.compressor_extension          if os.path.isfile(final_name): -            user_error("There is already a file named '%s'!" % (final_name)) +            unexpected_error("There is already a file named '%s'!" %  +                final_name)          temp_name = tempfile.mktemp("archivemail_archive") @@ -300,118 +398,172 @@ class ArchiveMailbox(Mailbox):              uncompress =  "%s -d -c %s > %s" % (compressor,                   compressedfilename, temp_name)              vprint("running uncompressor: %s" % uncompress) -            stale.archive = temp_name +            _stale.archive = temp_name              system_or_die(uncompress) -        stale.archive = temp_name -        self.file = open(temp_name, "a") -        self.final_name = final_name +        _stale.archive = temp_name +        self.mbox_file = open(temp_name, "a") +        self.__final_name = final_name      def finalise(self): -        """rename the temp file back to the original compressed archive -           file""" +        """Compress the archive and rename this archive temporary file to the +        final archive filename, overwriting any pre-existing archive if it +        exists. + +        """ +        assert(self.__final_name)          self.close() -        compressor = options.compressor -        compressed_archive_name = self.file.name + options.compressor_extension -        compress = compressor + " " + self.file.name +        compressor = _options.compressor +        compressed_archive_name = self.mbox_file.name +  \ +            _options.compressor_extension +        compress = compressor + " " + self.mbox_file.name          vprint("running compressor: '%s'" % compress) - -        stale.compressed_archive = compressed_archive_name +        _stale.compressed_archive = compressed_archive_name          system_or_die(compress) -        stale.archive = None - -        compressed_final_name = self.final_name + options.compressor_extension +        _stale.archive = None +        compressed_final_name = self.__final_name + _options.compressor_extension          vprint("renaming '%s' to '%s'" % (compressed_archive_name,               compressed_final_name))          os.rename(compressed_archive_name, compressed_final_name) -        stale.compressed_archive = None +        _stale.compressed_archive = None -class OriginalMailbox(Mailbox): -    """This is the mailbox that we read messages from to determine if they are -       too old. We will never write to this file directly except at the end -       where we override the whole file with the RetainMailbox.""" -    file = None +class IdentityCache: +    seen_ids = {} +    mailbox_name = None      def __init__(self, mailbox_name): -        """open the mailbox, ready for reading""" -        try: -            self.file = open(mailbox_name, "r") -        except IOError, msg: -            user_error(msg) +        assert(mailbox_name) +        self.mailbox_name = mailbox_name +    def warn_if_dupe(self, msg): +        message_id = msg.get('Message-ID') +        if self.seen_ids.has_key(message_id): +            user_warning("duplicate message id: '%s' in mailbox '%s'" %  +                (message_id, self.mailbox_name)) +        self.seen_ids[message_id] = 1 + + +# global class instances +_options = Options()  # the run-time options object  def main(args = sys.argv[1:]): -    global options -    global stale +    global _stale -    options = Options()      usage = """Usage: %s [options] mailbox [mailbox...] -Moves old mail messages in mbox-format mailboxes to compressed mailbox -archives. This is useful for saving space and keeping your mailbox manageable. -  Options are as follows: +Moves old mail messages in mbox or maildir-format mailboxes to compressed +'mbox' mailbox archives. This is useful for saving space and keeping your +mailbox manageable. + +Options are as follows:    -d, --days=<days>    archive messages older than <days> days (default: %d) -  -s, --suffix=<name>  suffix for archive filename (default: '%s') -  -z, --gzip           compress the archive using gzip (default)  -  -I, --bzip2          compress the archive using bzip2 -  -Z, --compress       compress the archive using compress +  -s, --suffix=NAME    suffix for archive filename (default: '%s') +  -m, --modify-time    use file last-modified time as date for maildir messages +  -n, --dry-run        don't write to anything - just show what would be done +  -o, --output-dir=DIR directory where archive files go (default: current) +      --ignore-dupe    don't warn about mailboxes with duplicates messages +  -z, --gzip           compress the archive(s) using gzip (default)  +  -I, --bzip2          compress the archive(s) using bzip2 +  -Z, --compress       compress the archive(s) using compress        --delete         delete rather than archive old mail (use with caution!)    -v, --verbose        report lots of extra debugging information    -q, --quiet          quiet mode - print no statistics (suitable for crontab)    -V, --version        display version information    -h, --help           display this message +  Example: %s linux-devel -  This will move all messages older than %s days to a file called  +  This will move all messages older than %s days to a 'mbox' mailbox called     'linux-devel_archive.gz', deleting them from the original 'linux-devel'    mailbox. If the 'linux-devel_archive.gz' mailbox already exists, the     newly archived messages are appended. -""" % (options.script_name, options.days_old_max, options.archive_suffix,  -       options.script_name, options.days_old_max) + +Report bugs to <paul@paulrodger.com>. """ %  (_options.script_name,  +    _options.days_old_max, _options.archive_suffix, _options.script_name,  +    _options.days_old_max)      check_python_version() -    args = options.parse_args(args, usage) +    args = _options.parse_args(args, usage)      if len(args) == 0:          print usage          sys.exit(1)      os.umask(077) # saves setting permissions on mailboxes/tempfiles -    stale = StaleFiles() -    atexit.register(clean_up) -    for filename in args: -        tempfile.tempdir = os.path.dirname(filename) # don't use /var/tmp -        final_archive_name = filename + options.archive_suffix -        archive_mailbox(mailbox_name = filename,  -                        final_archive_name = final_archive_name) +    # Make sure we clean up nicely - we don't want to leave stale procmail +    # lockfiles about if something bad happens to us. This is quite  +    # important, even though procmail will delete stale files after a while. +    _stale = StaleFiles() # remember what we have to delete +    atexit.register(clean_up) # delete stale files on exceptions/normal exit +    signal.signal(signal.SIGHUP, clean_up_signal)   # signal 1 +    # SIGINT (signal 2) is handled as a python exception +    signal.signal(signal.SIGQUIT, clean_up_signal)  # signal 3 +    signal.signal(signal.SIGTERM, clean_up_signal)  # signal 15 +    for mailbox_path in args: +        archive(mailbox_path)  ######## errors and debug ##########  def vprint(string): -    """this saves putting 'if (verbose) print foo' everywhere""" -    if options.verbose: +    """Print the string argument if we are in verbose mode""" +    if _options.verbose:          print string -def user_error(string): -    """fatal error, probably something the user did wrong""" -    script_name = options.script_name -    message = "%s: %s\n" % (script_name, string) +def unexpected_error(string): +    """Print the string argument, a 'shutting down' message and abort -  +    this function never returns""" +    sys.stderr.write("%s: %s\n" % (_options.script_name, string)) +    sys.stderr.write("%s: unexpected error encountered - shutting down\n" %  +        _options.script_name) +    sys.exit(1) -    sys.stderr.write(message) + +def user_error(string): +    """Print the string argument and abort - this function never returns""" +    sys.stderr.write("%s: %s\n" % (_options.script_name, string))      sys.exit(1) + +def user_warning(string): +    """Print the string argument""" +    sys.stderr.write("%s: Warning - %s\n" % (_options.script_name, string)) +  ########### operations on a message ############ -def is_too_old(message): -    """return true if a message is too old (and should be archived),  -       false otherwise""" +def make_mbox_from(message): +    """Return a string suitable for use as a 'From_' mbox header for the +    message. + +    Arguments: +    message -- the rfc822 message object + +    """ +    assert(message) +    address_header = message.get('Return-path') +    if not address_header: +        vprint("make_mbox_from: no Return-path -- using 'From:' instead!") +        address_header = message.get('From') +    (name, address) = rfc822.parseaddr(address_header) +    date = rfc822.parsedate(message.get('Delivery-date')) +    if not date: +        date = rfc822.parsedate(message.get('Date')) +    date_string = time.asctime(date) +    mbox_from = "From %s %s\n" % (address, date_string) +    return mbox_from + +def get_date_mtime(message): +    """Return the delivery date of an rfc822 message in a maildir mailbox"""  +    vprint("using last-modification time of message file") +    return os.path.getmtime(message.fp.name) + +def get_date_headers(message): +    """Return the delivery date of an rfc822 message in a mbox mailbox"""       date = message.getdate('Date')      delivery_date = message.getdate('Delivery-date')      use_date = None      time_message = None -      if delivery_date:          try:              time_message = time.mktime(delivery_date) @@ -427,20 +579,27 @@ def is_too_old(message):          except ValueError:              pass      if not use_date: -        print message -        vprint("no valid dates found for message") -        return 0   +        unexpected_error("no valid dates found for message") +    return time_message + +def is_too_old(time_message): +    """Return true if a message is too old (and should be archived),  +    false otherwise. + +    Arguments: +    time_message -- the delivery date of the message measured in seconds +                    since the epoch +        +    """ +    assert(time_message)      time_now = time.time()      if time_message > time_now: -        time_string = time.asctime(use_date) -        vprint("warning: message has date in the future: %s !" % time_string) +        vprint("warning: message has date in the future")          return 0 - -    secs_old_max = (options.days_old_max * 24 * 60 * 60) +    secs_old_max = (_options.days_old_max * 24 * 60 * 60)      days_old = (time_now - time_message) / 24 / 60 / 60      vprint("message is %.2f days old" % days_old) -      if ((time_message + secs_old_max) < time_now):          return 1      return 0 @@ -448,120 +607,222 @@ def is_too_old(message):  ###############  mailbox operations ############### -def archive_mailbox(mailbox_name, final_archive_name): -    """process and archive the given mailbox name""" +def archive(mailbox_name): +    """Archives a mailbox. + +    Arguments: +    mailbox_name -- the filename/dirname of the mailbox to be archived +    final_archive_name -- the filename of the 'mbox' mailbox to archive +                          old messages to - appending if the archive  +                          already exists + +    """ +    tempfile.tempdir = choose_temp_dir(mailbox_name) +    vprint("set tempfile directory to '%s'" % tempfile.tempdir) + +    final_archive_name = mailbox_name + _options.archive_suffix +    if _options.output_dir: +        final_archive_name = os.path.join(_options.output_dir,  +            final_archive_name) +    vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name)) + +    if os.path.islink(mailbox_name): +        unexpected_error("'%s' is a symbolic link -- I am nervous" %  +            mailbox_name) +    elif os.path.isfile(mailbox_name): +        vprint("guessing mailbox is of type: mbox") +        _archive_mbox(mailbox_name, final_archive_name) +    elif os.path.isdir(mailbox_name): +        cur_path = os.path.join(mailbox_name, "cur") +        new_path = os.path.join(mailbox_name, "new") +        if os.path.isdir(cur_path) and os.path.isdir(new_path): +            vprint("guessing mailbox is of type: maildir") +            _archive_maildir(mailbox_name, final_archive_name) +        else: +            vprint("guessing mailbox is of type: MH") +            _archive_mh(mailbox_name, final_archive_name) +    else: +        user_error("'%s': no such file or directory" % mailbox_name) + + +def _archive_mbox(mailbox_name, final_archive_name): +    """Archive a 'mbox' style mailbox - used by archive_mailbox() + +    Arguments: +    mailbox_name -- the filename/dirname of the mailbox to be archived +    final_archive_name -- the filename of the 'mbox' mailbox to archive +                          old messages to - appending if the archive  +                          already exists +    """      archive = None      retain = None -     -    vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name))      stats = Stats(mailbox_name, final_archive_name) - -    original = OriginalMailbox(mailbox_name) -    if original.get_size() == 0: -        original.close() -        vprint("skipping '%s' because it is a zero-length file" %  -            original.file.name) -        if not options.quiet: -            stats.display() -        return +    original = Mbox(mailbox_name) +    cache = IdentityCache(mailbox_name)      original.procmail_lock()      original.exclusive_lock() - -    msg = original.read_message() -    if not msg: -       user_error("file '%s' is not in 'mbox' format" % mailbox.file.name)  - +    msg = original.next()      while (msg):          stats.another_message() -        message_id = msg.get('Message-ID') -        vprint("processing message '%s'" % message_id) -        if is_too_old(msg): +        vprint("processing message '%s'" % msg.get('Message-ID')) +        if _options.warn_duplicates: +            cache.warn_if_dupe(msg)              +        time_message = get_date_headers(msg) +        if is_too_old(time_message):              stats.another_archived() -            if options.delete_old_mail: +            if _options.delete_old_mail:                  vprint("decision: delete message")              else:                  vprint("decision: archive message") -                if (not archive): -                    archive = ArchiveMailbox(final_archive_name) -                archive.store(msg) +                if not _options.dry_run: +                    if (not archive): +                        archive = ArchiveMbox(final_archive_name) +                    archive.write(msg)          else:              vprint("decision: retain message") -            if (not retain): -                retain = RetainMailbox() -            retain.store(msg) -        msg = original.read_message() +            if not _options.dry_run: +                if (not retain): +                    retain = RetainMbox(mailbox_name) +                retain.write(msg) +        msg = original.next()      vprint("finished reading messages")  -      original.exclusive_unlock()      original.close() +    if not _options.dry_run: +        if retain: retain.close() +        if archive: archive.close() +        if _options.delete_old_mail: +            # we will never have an archive file +            if retain: +                retain.finalise(mailbox_name) +            else: +                # nothing was retained - everything was deleted +                original.leave_empty() +        elif archive: +            archive.finalise() +            if retain: +                retain.finalise() +            else: +                # nothing was retained - everything was deleted +                original.leave_empty() +        else: +            # There was nothing to archive +            if retain: +                # retain will be the same as original mailbox  +                retain.unlink() +    original.procmail_unlock() +    if not _options.quiet: +        stats.display() + -    if options.delete_old_mail: -        # we will never have an archive file -        if retain: -            retain.finalise(mailbox_name) +def _archive_maildir(mailbox_name, final_archive_name): +    """Archive a 'maildir' style mailbox - used by archive_mailbox()""" +    archive = None +    stats = Stats(mailbox_name, final_archive_name) +    original = mailbox.Maildir(mailbox_name) +    cache = IdentityCache(mailbox_name) +    assert(original) +    msg = original.next() +    assert(msg) +    delete_queue = [] +    while (msg): +        stats.another_message() +        vprint("processing message '%s'" % msg.get('Message-ID')) +        if _options.warn_duplicates: +            cache.warn_if_dupe(msg)              +        if _options.use_modify_time: +            time_message = get_date_mtime(msg)          else: -            original.leave_empty() -    elif archive: -        archive.finalise() -        if retain: -            retain.finalise(mailbox_name) +            time_message = get_date_headers(msg) +        if is_too_old(time_message): +            stats.another_archived() +            if _options.delete_old_mail: +                vprint("decision: delete message") +            else: +                vprint("decision: archive message") +                if not _options.dry_run: +                    if (not archive): +                        archive = ArchiveMbox(final_archive_name) +                    archive.write(msg) +            if not _options.dry_run: delete_queue.append(msg.fp.name)           else: -            original.leave_empty() -    else: -        # There was nothing to archive -        if retain: -            # retain will be the same as original mailbox -- no point copying -            retain.close() -            retain.unlink() - -    original.procmail_unlock() -    if not options.quiet: +            vprint("decision: retain message") +        msg = original.next() +    vprint("finished reading messages")  +    if not _options.dry_run: +        if archive: +            archive.close() +            archive.finalise() +        for file_name in delete_queue: +            try: +                os.unlink(file_name) +            except (OSError), msg: +                # This could happen -- a person could be deleting messages +                # with a mail reader while this script is running. That  +                # should be ok. How about permission denied problems though? +                if not _options.quiet: +                    print "unlink warning: %s" % msg +    if not _options.quiet:          stats.display() +def _archive_mh(mailbox_name, final_archive_name): +    """Archive a 'MH' style mailbox - see archive_mailbox()""" +    unexpected_error("'MH' type mailbox support not yet implemented")  ###############  misc  functions  ###############  def clean_up(): -    """This is run on exit to make sure we haven't left any stale -    files/lockfiles left on the system""" +    """Delete stale files -- to be registered with atexit.register()"""      vprint("cleaning up ...") -    if stale.procmail_lock: -        vprint("removing stale procmail lock '%s'" % stale.procmail_lock) -        try: os.unlink(stale.procmail_lock) -        except (IOError, OSError): pass -    if stale.retain: -        vprint("removing stale retain file '%s'" % stale.retain) -        try: os.unlink(stale.retain) -        except (IOError, OSError): pass -    if stale.archive: -        vprint("removing stale archive file '%s'" % stale.archive) -        try: os.unlink(stale.archive) -        except (IOError, OSError): pass -    if stale.compressed_archive: -        vprint("removing stale compressed archive file '%s'" % -            stale.compressed_archive) -        try: os.unlink(stale.compressed_archive) -        except (IOError, OSError): pass +    _stale.clean() + + +def clean_up_signal(signal_number, stack_frame): +    """Delete stale files -- to be registered as a signal handler. + +    Arguments: +    signal_number -- signal number of the terminating signal +    stack_frame -- the current stack frame +     +    """ +    # this will run the above clean_up(), since unexpected_error() +    # will abort with sys.exit() and clean_up will be registered  +    # at this stage +    unexpected_error("received signal %s" % signal_number) + + +def choose_temp_dir(mailbox_path): +    """Set the directory for temporary files to something safe. +     +    Arguments: +    mailbox_path -- path name to the original mailbox +    """ +    temp_dir = os.path.dirname(mailbox_path) +    if _options.output_dir: +        temp_dir = _options.output_dir +    if not temp_dir: +        temp_dir = os.curdir # use the current directory +    return temp_dir  def check_python_version(): -    """make sure we are running with the right version of python""" +    """Abort if we are running on python < v2.0"""      build = sys.version      too_old_error = "requires python v2.0 or greater. Your version is: %s" % build      try:           version = sys.version_info  # we might not even have this function! :)          if (version[0] < 2): -            UserError(too_old_error) +            unexpected_error(too_old_error)      except:  # I should be catching more specific exceptions -        UserError(too_old_error) +        unexpected_error(too_old_error)  def system_or_die(command): -    """Give a user_error() if the command we ran returned a non-zero status""" +    """Run the command with os.system(), aborting on non-zero exit"""      rv = os.system(command)      if (rv != 0):          status = os.WEXITSTATUS(rv) -        user_error("command '%s' returned status %d" % (command, status)) +        unexpected_error("command '%s' returned status %d" % (command, status))  # this is where it all happens, folks | 
