aboutsummaryrefslogtreecommitdiffstats
path: root/archivemail.py
diff options
context:
space:
mode:
authorPaul Rodger <paul@paulrodger.com>2002-03-26 03:53:09 +0000
committerPaul Rodger <paul@paulrodger.com>2002-03-26 03:53:09 +0000
commit902a81b4bcf1b133b434322370996c2cd30e0f26 (patch)
treeee7be222eff82fa0d9311774b0f59ab1e8607d6a /archivemail.py
parente7b0a0331febb79e5a6fa3964a02d54fc7307665 (diff)
downloadarchivemail-902a81b4bcf1b133b434322370996c2cd30e0f26.tar.gz
archivemail-902a81b4bcf1b133b434322370996c2cd30e0f26.tar.bz2
archivemail-902a81b4bcf1b133b434322370996c2cd30e0f26.zip
Initial revision
Diffstat (limited to 'archivemail.py')
-rwxr-xr-xarchivemail.py569
1 files changed, 569 insertions, 0 deletions
diff --git a/archivemail.py b/archivemail.py
new file mode 100755
index 0000000..bae40a6
--- /dev/null
+++ b/archivemail.py
@@ -0,0 +1,569 @@
+#!/usr/bin/python -tt
+############################################################################
+# Copyright (C) 2002 Paul Rodger <paul@paulrodger.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+############################################################################
+
+"""Archive and compress old mail in mbox-format mailboxes"""
+
+import atexit
+import fcntl
+import getopt
+import mailbox
+import os
+import re
+import rfc822
+import string
+import sys
+import tempfile
+import time
+
+# globals
+VERSION = "archivemail v0.1.0"
+COPYRIGHT = """Copyright (C) 2002 Paul Rodger <paul@paulrodger.com>
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."""
+
+options = None # global instance of the run-time options class
+stale = None # list of files to delete on abnormal exit
+
+############## class definitions ###############
+
+class Stats:
+ """collect and print statistics per mailbox"""
+ archived = 0
+ mailbox_name = None
+ archive_name = None
+ start_time = 0
+ total = 0
+
+ def __init__(self, mailbox_name, final_archive_name):
+ """constructor for a new set of statistics - the mailbox names are
+ only used for printing a friendly message"""
+ self.start_time = time.time()
+ self.mailbox_name = mailbox_name
+ self.archive_name = final_archive_name + options.compressor_extension
+
+ def another_message(self):
+ self.total = self.total + 1
+
+ def another_archived(self):
+ self.archived = self.archived + 1
+
+ def display(self):
+ """Display one line of archive statistics for the mailbox"""
+ end_time = time.time()
+ time_seconds = end_time - self.start_time
+ action = "archived"
+ if options.delete_old_mail:
+ action = "deleted"
+ print "%s: %s %d of %d message(s) in %.1f seconds" % \
+ (self.mailbox_name, action, self.archived, self.total,
+ time_seconds)
+
+
+class StaleFiles:
+ """container for remembering stale files to delete on abnormal exit"""
+ archive = None # tempfile for messages to be archived
+ compressed_archive = None # compressed version of the above
+ procmail_lock = None # original_mailbox.lock
+ retain = None # tempfile for messages to be retained
+
+
+class Options:
+ """container for storing and setting our runtime options"""
+ archive_suffix = "_archive"
+ compressor = None
+ compressor_extension = None
+ days_old_max = 180
+ delete_old_mail = 0
+ lockfile_attempts = 5 # 5 seconds of waiting
+ lockfile_extension = ".lock"
+ quiet = 0
+ script_name = os.path.basename(sys.argv[0])
+ verbose = 0
+
+ def parse_args(self, args, usage):
+ """set our runtime options from the command-line arguments"""
+ try:
+ opts, args = getopt.getopt(args, '?IVZd:hqs:vz',
+ ["bzip2", "compress", "days=", "delete", "gzip",
+ "help", "quiet", "suffix", "verbose",
+ "version"])
+ except getopt.error, msg:
+ user_error(msg)
+ for o, a in opts:
+ if o == '--delete':
+ self.delete_old_mail = 1
+ if o in ('-d', '--days'):
+ self.days_old_max = string.atoi(a)
+ if (self.days_old_max < 1):
+ user_error("argument to -d must be greater than zero")
+ if (self.days_old_max >= 10000):
+ user_error("argument to -d must be less than 10000")
+ if o in ('-h', '-?', '--help'):
+ print usage
+ sys.exit(0)
+ if o in ('-q', '--quiet'):
+ self.quiet = 1
+ if o in ('-v', '--verbose'):
+ self.verbose = 1
+ if o in ('-s', '--suffix'):
+ self.archive_suffix = a
+ if o in ('-V', '--version'):
+ print VERSION + "\n\n" + COPYRIGHT
+ sys.exit(0)
+ if o in ('-z', '--gzip'):
+ if (self.compressor):
+ user_error("conflicting compression options")
+ self.compressor = "gzip"
+ if o in ('-Z', '--compress'):
+ if (self.compressor):
+ user_error("conflicting compression options")
+ self.compressor = "compress"
+ if o in ('-I', '--bzip2'):
+ if (self.compressor):
+ user_error("conflicting compression options")
+ self.compressor = "bzip2"
+ if not self.compressor:
+ self.compressor = "gzip"
+ extensions = {
+ "compress" : ".Z",
+ "gzip" : ".gz",
+ "bzip2" : ".bz2",
+ }
+ self.compressor_extension = extensions[self.compressor]
+ return args
+
+
+class Mailbox:
+ """ generic read/writable 'mbox' format mailbox file"""
+ count = 0
+ file = None
+ mbox = None
+
+ def __init__(self):
+ """constructor: doesn't do much"""
+ pass
+
+ def store(self, msg):
+ """write one message to the mbox file"""
+ vprint("saving message to file '%s'" % self.file.name)
+ assert(msg.unixfrom)
+ self.file.write(msg.unixfrom)
+ assert(msg.headers)
+ self.file.writelines(msg.headers)
+ self.file.write("\n")
+
+ # The following while loop is about twice as fast in
+ # practice to 'self.file.writelines(msg.fp.readlines())'
+ while 1:
+ body = msg.fp.read(8192)
+ if not body:
+ break
+ self.file.write(body)
+ self.count = self.count + 1
+
+ def unlink(self):
+ """destroy the whole thing"""
+ if self.file:
+ file_name = self.file.name
+ self.close()
+ vprint("unlinking file '%s'" % self.file.name)
+ os.unlink(file_name)
+
+ def get_size(self):
+ """determine file size of this mbox file"""
+ assert(self.file.name)
+ return os.path.getsize(self.file.name)
+
+ def close(self):
+ """close the mbox file"""
+ if not self.file.closed:
+ vprint("closing file '%s'" % self.file.name)
+ self.file.close()
+
+ def read_message(self):
+ """read one rfc822 message object from the mbox file"""
+ if not self.mbox:
+ self.file.seek(0)
+ self.mbox = mailbox.UnixMailbox(self.file)
+ assert(self.mbox)
+ message = self.mbox.next()
+ return message
+
+ def exclusive_lock(self):
+ """set an advisory lock on the whole mbox file"""
+ vprint("obtaining exclusive lock on file '%s'" % self.file.name)
+ fcntl.flock(self.file, fcntl.LOCK_EX)
+
+ def exclusive_unlock(self):
+ """unset any advisory lock on the mbox file"""
+ vprint("dropping exclusive lock on file '%s'" % self.file.name)
+ fcntl.flock(self.file, fcntl.LOCK_UN)
+
+ def procmail_lock(self):
+ """create a procmail-style .lock file to prevent clashes"""
+ lock_name = self.file.name + options.lockfile_extension
+ attempt = 0
+ while os.path.isfile(lock_name):
+ vprint("lockfile '%s' exists - sleeping..." % lock_name)
+ time.sleep(1)
+ attempt = attempt + 1
+ if (attempt >= options.lockfile_attempts):
+ user_error("Giving up waiting for procmail lock '%s'" % lock_name)
+ vprint("writing lockfile '%s'" % lock_name)
+ lock = open(lock_name, "w")
+ stale.procmail_lock = lock_name
+ lock.close()
+
+ def procmail_unlock(self):
+ """delete our procmail-style .lock file"""
+ lock_name = self.file.name + options.lockfile_extension
+ vprint("removing lockfile '%s'" % lock_name)
+ os.unlink(lock_name)
+ stale.procmail_lock = None
+
+ def leave_empty(self):
+ """This should be the same as 'cp /dev/null mailbox'.
+ This will leave a zero-length mailbox file so that mail
+ reading programs don't get upset that the mailbox has been
+ completely deleted."""
+ vprint("turning '%s' into a zero-length file" % self.file.name)
+ atime = os.path.getatime(self.file.name)
+ mtime = os.path.getmtime(self.file.name)
+ blank_file = open(self.file.name, "w")
+ blank_file.close()
+ os.utime(self.file.name, (atime, mtime)) # reset to original timestamps
+
+
+
+class RetainMailbox(Mailbox):
+ """a temporary mailbox for holding messages that will be retained in the
+ original mailbox"""
+ def __init__(self):
+ """constructor - create the temporary file"""
+ temp_name = tempfile.mktemp("archivemail_retain")
+ self.file = open(temp_name, "w")
+ stale.retain = temp_name
+ vprint("opened temporary retain file '%s'" % self.file.name)
+
+ def finalise(self, final_name):
+ """constructor - create the temporary file"""
+ self.close()
+
+ atime = os.path.getatime(final_name)
+ mtime = os.path.getmtime(final_name)
+
+ vprint("renaming '%s' to '%s'" % (self.file.name, final_name))
+ os.rename(self.file.name, final_name)
+
+ os.utime(final_name, (atime, mtime)) # reset to original timestamps
+ stale.retain = None
+
+ def unlink(self):
+ """Override the base-class version, removing from stalefiles"""
+ Mailbox.unlink(self)
+ stale.retain = None
+
+
+class ArchiveMailbox(Mailbox):
+ """all messages that are too old go here"""
+ final_name = None # this is
+ def __init__(self, final_name):
+ """copy any pre-existing compressed archive to a temp file which we
+ use as the new soon-to-be compressed archive"""
+ assert(final_name)
+ compressor = options.compressor
+ compressedfilename = final_name + options.compressor_extension
+
+ if os.path.isfile(final_name):
+ user_error("There is already a file named '%s'!" % (final_name))
+
+ temp_name = tempfile.mktemp("archivemail_archive")
+
+ if os.path.isfile(compressedfilename):
+ vprint("file already exists that is named: %s" % compressedfilename)
+ uncompress = "%s -d -c %s > %s" % (compressor,
+ compressedfilename, temp_name)
+ vprint("running uncompressor: %s" % uncompress)
+ stale.archive = temp_name
+ system_or_die(uncompress)
+
+ stale.archive = temp_name
+ self.file = open(temp_name, "a")
+ self.final_name = final_name
+
+ def finalise(self):
+ """rename the temp file back to the original compressed archive
+ file"""
+ self.close()
+ compressor = options.compressor
+ compressed_archive_name = self.file.name + options.compressor_extension
+ compress = compressor + " " + self.file.name
+ vprint("running compressor: '%s'" % compress)
+
+ stale.compressed_archive = compressed_archive_name
+ system_or_die(compress)
+ stale.archive = None
+
+ compressed_final_name = self.final_name + options.compressor_extension
+ vprint("renaming '%s' to '%s'" % (compressed_archive_name,
+ compressed_final_name))
+ os.rename(compressed_archive_name, compressed_final_name)
+ stale.compressed_archive = None
+
+
+class OriginalMailbox(Mailbox):
+ """This is the mailbox that we read messages from to determine if they are
+ too old. We will never write to this file directly except at the end
+ where we override the whole file with the RetainMailbox."""
+ file = None
+ def __init__(self, mailbox_name):
+ """open the mailbox, ready for reading"""
+ try:
+ self.file = open(mailbox_name, "r")
+ except IOError, msg:
+ user_error(msg)
+
+
+def main(args = sys.argv[1:]):
+ global options
+ global stale
+
+ options = Options()
+ usage = """Usage: %s [options] mailbox [mailbox...]
+Moves old mail messages in mbox-format mailboxes to compressed mailbox
+archives. This is useful for saving space and keeping your mailbox manageable.
+ Options are as follows:
+ -d, --days=<days> archive messages older than <days> days (default: %d)
+ -s, --suffix=<name> suffix for archive filename (default: '%s')
+ -z, --gzip compress the archive using gzip (default)
+ -I, --bzip2 compress the archive using bzip2
+ -Z, --compress compress the archive using compress
+ --delete delete rather than archive old mail (use with caution!)
+ -v, --verbose report lots of extra debugging information
+ -q, --quiet quiet mode - print no statistics (suitable for crontab)
+ -V, --version display version information
+ -h, --help display this message
+Example: %s linux-devel
+ This will move all messages older than %s days to a file called
+ 'linux-devel_archive.gz', deleting them from the original 'linux-devel'
+ mailbox. If the 'linux-devel_archive.gz' mailbox already exists, the
+ newly archived messages are appended.
+""" % (options.script_name, options.days_old_max, options.archive_suffix,
+ options.script_name, options.days_old_max)
+
+ check_python_version()
+
+ args = options.parse_args(args, usage)
+ if len(args) == 0:
+ print usage
+ sys.exit(1)
+
+ os.umask(077) # saves setting permissions on mailboxes/tempfiles
+ stale = StaleFiles()
+ atexit.register(clean_up)
+
+ for filename in args:
+ tempfile.tempdir = os.path.dirname(filename) # don't use /var/tmp
+ final_archive_name = filename + options.archive_suffix
+ archive_mailbox(mailbox_name = filename,
+ final_archive_name = final_archive_name)
+
+
+
+######## errors and debug ##########
+
+def vprint(string):
+ """this saves putting 'if (verbose) print foo' everywhere"""
+ if options.verbose:
+ print string
+
+
+def user_error(string):
+ """fatal error, probably something the user did wrong"""
+ script_name = options.script_name
+ message = "%s: %s\n" % (script_name, string)
+
+ sys.stderr.write(message)
+ sys.exit(1)
+
+########### operations on a message ############
+
+def is_too_old(message):
+ """return true if a message is too old (and should be archived),
+ false otherwise"""
+ date = message.getdate('Date')
+ delivery_date = message.getdate('Delivery-date')
+ use_date = None
+ time_message = None
+
+ if delivery_date:
+ try:
+ time_message = time.mktime(delivery_date)
+ use_date = delivery_date
+ vprint("using message 'Delivery-date' header")
+ except ValueError:
+ pass
+ if date and not use_date:
+ try:
+ time_message = time.mktime(date)
+ use_date = date
+ vprint("using message 'Date' header")
+ except ValueError:
+ pass
+ if not use_date:
+ print message
+ vprint("no valid dates found for message")
+ return 0
+
+ time_now = time.time()
+ if time_message > time_now:
+ time_string = time.asctime(use_date)
+ vprint("warning: message has date in the future: %s !" % time_string)
+ return 0
+
+ secs_old_max = (options.days_old_max * 24 * 60 * 60)
+ days_old = (time_now - time_message) / 24 / 60 / 60
+ vprint("message is %.2f days old" % days_old)
+
+ if ((time_message + secs_old_max) < time_now):
+ return 1
+ return 0
+
+
+############### mailbox operations ###############
+
+def archive_mailbox(mailbox_name, final_archive_name):
+ """process and archive the given mailbox name"""
+ archive = None
+ retain = None
+
+ vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name))
+ stats = Stats(mailbox_name, final_archive_name)
+
+ original = OriginalMailbox(mailbox_name)
+ if original.get_size() == 0:
+ original.close()
+ vprint("skipping '%s' because it is a zero-length file" %
+ original.file.name)
+ if not options.quiet:
+ stats.display()
+ return
+ original.procmail_lock()
+ original.exclusive_lock()
+
+ msg = original.read_message()
+ if not msg:
+ user_error("file '%s' is not in 'mbox' format" % mailbox.file.name)
+
+ while (msg):
+ stats.another_message()
+ message_id = msg.get('Message-ID')
+ vprint("processing message '%s'" % message_id)
+ if is_too_old(msg):
+ stats.another_archived()
+ if options.delete_old_mail:
+ vprint("decision: delete message")
+ else:
+ vprint("decision: archive message")
+ if (not archive):
+ archive = ArchiveMailbox(final_archive_name)
+ archive.store(msg)
+ else:
+ vprint("decision: retain message")
+ if (not retain):
+ retain = RetainMailbox()
+ retain.store(msg)
+ msg = original.read_message()
+ vprint("finished reading messages")
+
+ original.exclusive_unlock()
+ original.close()
+
+ if options.delete_old_mail:
+ # we will never have an archive file
+ if retain:
+ retain.finalise(mailbox_name)
+ else:
+ original.leave_empty()
+ elif archive:
+ archive.finalise()
+ if retain:
+ retain.finalise(mailbox_name)
+ else:
+ original.leave_empty()
+ else:
+ # There was nothing to archive
+ if retain:
+ # retain will be the same as original mailbox -- no point copying
+ retain.close()
+ retain.unlink()
+
+ original.procmail_unlock()
+ if not options.quiet:
+ stats.display()
+
+
+############### misc functions ###############
+
+def clean_up():
+ """This is run on exit to make sure we haven't left any stale
+ files/lockfiles left on the system"""
+ vprint("cleaning up ...")
+ if stale.procmail_lock:
+ vprint("removing stale procmail lock '%s'" % stale.procmail_lock)
+ try: os.unlink(stale.procmail_lock)
+ except (IOError, OSError): pass
+ if stale.retain:
+ vprint("removing stale retain file '%s'" % stale.retain)
+ try: os.unlink(stale.retain)
+ except (IOError, OSError): pass
+ if stale.archive:
+ vprint("removing stale archive file '%s'" % stale.archive)
+ try: os.unlink(stale.archive)
+ except (IOError, OSError): pass
+ if stale.compressed_archive:
+ vprint("removing stale compressed archive file '%s'" %
+ stale.compressed_archive)
+ try: os.unlink(stale.compressed_archive)
+ except (IOError, OSError): pass
+
+
+def check_python_version():
+ """make sure we are running with the right version of python"""
+ build = sys.version
+ too_old_error = "requires python v2.0 or greater. Your version is: %s" % build
+ try:
+ version = sys.version_info # we might not even have this function! :)
+ if (version[0] < 2):
+ UserError(too_old_error)
+ except: # I should be catching more specific exceptions
+ UserError(too_old_error)
+
+
+def system_or_die(command):
+ """Give a user_error() if the command we ran returned a non-zero status"""
+ rv = os.system(command)
+ if (rv != 0):
+ status = os.WEXITSTATUS(rv)
+ user_error("command '%s' returned status %d" % (command, status))
+
+
+# this is where it all happens, folks
+if __name__ == '__main__':
+ main()