aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG8
-rw-r--r--TODO5
-rwxr-xr-xarchivemail.py155
-rwxr-xr-xsetup.py2
-rwxr-xr-xtest_archivemail.py75
5 files changed, 64 insertions, 181 deletions
diff --git a/CHANGELOG b/CHANGELOG
index b91b008..8cd7189 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,11 @@
+Version 0.3.1 - ????
+ * Stopped calling 'gzip' externally and started using the gzip library
+ so that we can append to a copy of the gzip archive directly.
+ * Removed 'bzip2' and 'compress' options since they are increasing
+ complexity without adding much, and needed to be called externally.
+ Maybe when we get a bzip2 library I will add back an option to
+ compression archives using bzip2.
+
Version 0.3.0 - 11 April 2002
* We now preserve the last-accessed and last-modified timestamps correctly
* We now preserve the correct permissions on the original mailbox instead
diff --git a/TODO b/TODO
index 16a5cbf..94bc354 100644
--- a/TODO
+++ b/TODO
@@ -1,5 +1,5 @@
-Goals for next minor release (0.3.1):
+Goals for next minor release (0.3.2):
-------------------------------------
* Finish docbook sgml documentation & man page
@@ -8,10 +8,10 @@ Goals for next major release (0.4.0):
* Lock any original .gz files
- is this necessary?
* Check for symlink attacks for tempfiles (although we don't use /var/tmp)
+* Add a lot more unit test. (see top of test_archivemail.py)
Longer Term goals:
------------------
-* Use zlib instead of calling gzip directly
* Add MMDF mailbox support
* Add Babyl mailbox support
* Add option to archive depending on mailbox size threshold
@@ -22,4 +22,3 @@ Longer Term goals:
- is this a waste of time?
* Add option - do not compress
- is this useless?
-* Use a Makefile/python installation process
diff --git a/archivemail.py b/archivemail.py
index a2c5635..8feaa1d 100755
--- a/archivemail.py
+++ b/archivemail.py
@@ -22,7 +22,7 @@ Website: http://archivemail.sourceforge.net/
"""
# global administrivia
-__version__ = "archivemail v0.3.0"
+__version__ = "archivemail v0.3.1"
__cvs_id__ = "$Id$"
__copyright__ = """Copyright (C) 2002 Paul Rodger <paul@paulrodger.com>
This is free software; see the source for copying conditions. There is NO
@@ -48,10 +48,12 @@ check_python_version() # define & run this early because 'atexit' is new
import atexit
import fcntl
import getopt
+import gzip
import mailbox
import os
import re
import rfc822
+import shutil
import signal
import stat
import string
@@ -81,8 +83,7 @@ class Stats:
assert(final_archive_name)
self.__start_time = time.time()
self.__mailbox_name = mailbox_name
- self.__archive_name = final_archive_name + \
- options.compressor_extension()
+ self.__archive_name = final_archive_name + ".gz"
def another_message(self):
"""Add one to the internal count of total messages processed"""
@@ -109,7 +110,6 @@ class Stats:
class StaleFiles:
"""Class to keep track of files to be deleted on abnormal exit"""
archive = None # tempfile for messages to be archived
- compressed_archive = None # compressed version of the above
procmail_lock = None # original_mailbox.lock
retain = None # tempfile for messages to be retained
@@ -127,17 +127,11 @@ class StaleFiles:
vprint("removing stale archive file '%s'" % self.archive)
try: os.remove(self.archive)
except (IOError, OSError): pass
- if self.compressed_archive:
- vprint("removing stale compressed archive file '%s'" %
- self.compressed_archive)
- try: os.remove(self.compressed_archive)
- except (IOError, OSError): pass
class Options:
"""Class to store runtime options, including defaults"""
archive_suffix = "_archive"
- compressor = "gzip"
days_old_max = 180
delete_old_mail = 0
dry_run = 0
@@ -162,15 +156,13 @@ class Options:
"""
try:
- opts, args = getopt.getopt(args, '?IVZd:hno:qs:vz',
- ["bzip2", "compress", "days=", "delete",
- "dry-run", "gzip", "help", "output-dir=",
- "quiet", "suffix", "verbose", "version",
- "warn-duplicate"])
+ opts, args = getopt.getopt(args, '?Vd:hno:qs:v',
+ ["days=", "delete", "dry-run", "help",
+ "output-dir=", "quiet", "suffix", "verbose",
+ "version", "warn-duplicate"])
except getopt.error, msg:
user_error(msg)
- chosen_compressor = None
for o, a in opts:
if o == '--delete':
self.delete_old_mail = 1
@@ -194,23 +186,6 @@ class Options:
if o in ('-V', '--version'):
print __version__ + "\n\n" + __copyright__
sys.exit(0)
- if o in ('-z', '--gzip'):
- if (chosen_compressor):
- user_error("conflicting compression options")
- self.compressor = "gzip"
- chosen_compressor = 1
- if o in ('-Z', '--compress'):
- if (chosen_compressor):
- user_error("conflicting compression options")
- self.compressor = "compress"
- chosen_compressor = 1
- if o in ('-I', '--bzip2'):
- if (chosen_compressor):
- user_error("conflicting compression options")
- self.compressor = "bzip2"
- chosen_compressor = 1
- if not self.compressor:
- self.compressor = "gzip"
return args
def sanity_check(self):
@@ -230,15 +205,6 @@ class Options:
if (self.days_old_max >= 10000):
user_error("argument to -d must be less than 10000")
- def compressor_extension(self):
- extensions = {
- "compress" : ".Z",
- "gzip" : ".gz",
- "bzip2" : ".bz2",
- }
- return extensions[self.compressor]
-
-
class Mbox(mailbox.PortableUnixMailbox):
"""Class that allows read/write access to a 'mbox' mailbox.
@@ -246,6 +212,8 @@ class Mbox(mailbox.PortableUnixMailbox):
"""
mbox_file = None # file handle for the mbox file
+ mbox_file_name = None # GzipFile has no .name variable
+ mbox_file_closed = 0 # GzipFile has no .closed variable
original_atime = None # last-accessed timestamp
original_mtime = None # last-modified timestamp
original_mode = None # file permissions to preserve
@@ -267,6 +235,7 @@ class Mbox(mailbox.PortableUnixMailbox):
self.mbox_file = open(path, mode)
except IOError, msg:
unexpected_error(msg)
+ self.mbox_file_name = path
mailbox.PortableUnixMailbox.__init__(self, self.mbox_file)
def write(self, msg):
@@ -281,7 +250,7 @@ class Mbox(mailbox.PortableUnixMailbox):
assert(msg)
assert(self.mbox_file)
- vprint("saving message to file '%s'" % self.mbox_file.name)
+ vprint("saving message to file '%s'" % self.mbox_file_name)
unix_from = msg.unixfrom
if not unix_from:
unix_from = make_mbox_from(msg)
@@ -300,44 +269,45 @@ class Mbox(mailbox.PortableUnixMailbox):
def remove(self):
"""Close and delete the 'mbox' mailbox file"""
- file_name = self.mbox_file.name
+ file_name = self.mbox_file_name
self.close()
- vprint("removing file '%s'" % self.mbox_file.name)
+ vprint("removing file '%s'" % self.mbox_file_name)
os.remove(file_name)
def is_empty(self):
"""Return true if the 'mbox' file is empty, false otherwise"""
- return (os.path.getsize(self.mbox_file.name) == 0)
+ return (os.path.getsize(self.mbox_file_name) == 0)
def close(self):
"""Close the mbox file"""
- if not self.mbox_file.closed:
- vprint("closing file '%s'" % self.mbox_file.name)
+ if not self.mbox_file_closed:
+ vprint("closing file '%s'" % self.mbox_file_name)
self.mbox_file.close()
+ self.mbox_file_closed = 1
def reset_stat(self):
"""Set the file timestamps and mode to the original value"""
assert(self.original_atime)
assert(self.original_mtime)
- assert(self.mbox_file.name)
+ assert(self.mbox_file_name)
assert(self.original_mode) # I doubt this will be 000?
- os.utime(self.mbox_file.name, (self.original_atime, \
+ os.utime(self.mbox_file_name, (self.original_atime, \
self.original_mtime))
- os.chmod(self.mbox_file.name, self.original_mode)
+ os.chmod(self.mbox_file_name, self.original_mode)
def exclusive_lock(self):
"""Set an advisory lock on the 'mbox' mailbox"""
- vprint("obtaining exclusive lock on file '%s'" % self.mbox_file.name)
+ vprint("obtaining exclusive lock on file '%s'" % self.mbox_file_name)
fcntl.flock(self.mbox_file, fcntl.LOCK_EX)
def exclusive_unlock(self):
"""Unset any advisory lock on the 'mbox' mailbox"""
- vprint("dropping exclusive lock on file '%s'" % self.mbox_file.name)
+ vprint("dropping exclusive lock on file '%s'" % self.mbox_file_name)
fcntl.flock(self.mbox_file, fcntl.LOCK_UN)
def procmail_lock(self):
"""Create a procmail lockfile on the 'mbox' mailbox"""
- lock_name = self.mbox_file.name + options.lockfile_extension
+ lock_name = self.mbox_file_name + options.lockfile_extension
attempt = 0
while os.path.isfile(lock_name):
vprint("lockfile '%s' exists - sleeping..." % lock_name)
@@ -355,8 +325,8 @@ class Mbox(mailbox.PortableUnixMailbox):
def procmail_unlock(self):
"""Delete the procmail lockfile on the 'mbox' mailbox"""
- assert(self.mbox_file.name)
- lock_name = self.mbox_file.name + options.lockfile_extension
+ assert(self.mbox_file_name)
+ lock_name = self.mbox_file_name + options.lockfile_extension
vprint("removing lockfile '%s'" % lock_name)
os.remove(lock_name)
_stale.procmail_lock = None
@@ -367,10 +337,10 @@ class Mbox(mailbox.PortableUnixMailbox):
This will leave a zero-length mailbox file so that mail
reading programs don't get upset that the mailbox has been
completely deleted."""
- assert(os.path.isfile(self.mbox_file.name))
- vprint("turning '%s' into a zero-length file" % self.mbox_file.name)
- mtime = os.path.getmtime(self.mbox_file.name)
- blank_file = open(self.mbox_file.name, "w")
+ assert(os.path.isfile(self.mbox_file_name))
+ vprint("turning '%s' into a zero-length file" % self.mbox_file_name)
+ mtime = os.path.getmtime(self.mbox_file_name)
+ blank_file = open(self.mbox_file_name, "w")
blank_file.close()
@@ -394,8 +364,9 @@ class RetainMbox(Mbox):
assert(final_name)
temp_name = tempfile.mktemp("archivemail_retain")
self.mbox_file = open(temp_name, "w")
+ self.mbox_file_name = temp_name
_stale.retain = temp_name
- vprint("opened temporary retain file '%s'" % self.mbox_file.name)
+ vprint("opened temporary retain file '%s'" % self.mbox_file_name)
self.__final_name = final_name
def finalise(self):
@@ -408,10 +379,10 @@ class RetainMbox(Mbox):
atime = os.path.getatime(self.__final_name)
mtime = os.path.getmtime(self.__final_name)
mode = os.stat(self.__final_name)[stat.ST_MODE]
- os.chmod(self.mbox_file.name, mode)
+ os.chmod(self.mbox_file_name, mode)
- vprint("renaming '%s' to '%s'" % (self.mbox_file.name, self.__final_name))
- os.rename(self.mbox_file.name, self.__final_name)
+ vprint("renaming '%s' to '%s'" % (self.mbox_file_name, self.__final_name))
+ os.rename(self.mbox_file_name, self.__final_name)
os.utime(self.__final_name, (atime, mtime)) # reset to original timestamps
_stale.retain = None
@@ -432,62 +403,50 @@ class ArchiveMbox(Mbox):
__final_name = None
def __init__(self, final_name):
- """Constructor -- extract any pre-existing compressed archive to a
+ """Constructor -- copy any pre-existing compressed archive to a
temporary file which we use as the new 'mbox' archive for this
mailbox.
Arguments:
final_name -- the final name for this archive mailbox. This function
will check to see if the filename already exists, and
- extract it to a temporary file if it does. It will also
+ copy it to a temporary file if it does. It will also
rename itself to this name when we call finalise()
"""
assert(final_name)
- compressor = options.compressor
- compressedfilename = final_name + options.compressor_extension()
+ compressed_filename = final_name + ".gz"
if os.path.isfile(final_name):
unexpected_error("""There is already a file named '%s'!
Have you been reading this archive? You probably should re-compress it
manually, and try running me again.""" % final_name)
- temp_name = tempfile.mktemp("archivemail_archive")
+ temp_name = tempfile.mktemp("archivemail_archive.gz")
- if os.path.isfile(compressedfilename):
- vprint("file already exists that is named: %s" % compressedfilename)
- uncompress = "%s -d -c %s > %s" % (compressor,
- compressedfilename, temp_name)
- vprint("running uncompressor: %s" % uncompress)
- _stale.archive = temp_name
- system_or_die(uncompress)
+ if os.path.isfile(compressed_filename):
+ vprint("file already exists that is named: %s" % \
+ compressed_filename)
+ shutil.copy2(compressed_filename, temp_name)
_stale.archive = temp_name
- self.mbox_file = open(temp_name, "a")
+ self.mbox_file = gzip.GzipFile(temp_name, "a")
+ self.mbox_file_name = temp_name
self.__final_name = final_name
def finalise(self):
- """Compress the archive and rename this archive temporary file to the
+ """Close the archive and rename this archive temporary file to the
final archive filename, overwriting any pre-existing archive if it
exists.
"""
assert(self.__final_name)
self.close()
- compressor = options.compressor
- compressed_archive_name = self.mbox_file.name + \
- options.compressor_extension()
- compress = compressor + " " + self.mbox_file.name
- vprint("running compressor: '%s'" % compress)
- _stale.compressed_archive = compressed_archive_name
- system_or_die(compress)
- _stale.archive = None
- compressed_final_name = self.__final_name + \
- options.compressor_extension()
- vprint("renaming '%s' to '%s'" % (compressed_archive_name,
+ compressed_final_name = self.__final_name + ".gz"
+ vprint("renaming '%s' to '%s'" % (self.mbox_file_name,
compressed_final_name))
- os.rename(compressed_archive_name, compressed_final_name)
- _stale.compressed_archive = None
+ os.rename(self.mbox_file_name, compressed_final_name)
+ _stale.archive = None
class IdentityCache:
@@ -517,7 +476,7 @@ def main(args = sys.argv[1:]):
global _stale
usage = """Usage: %s [options] mailbox [mailbox...]
-Moves old mail messages in mbox or maildir-format mailboxes to compressed
+Moves old mail messages in mbox or maildir-format mailboxes to gzipped
'mbox' mailbox archives. This is useful for saving space and keeping your
mailbox manageable.
@@ -526,9 +485,6 @@ Options are as follows:
-o, --output-dir=DIR directory where archive files go (default: current)
-s, --suffix=NAME suffix for archive filename (default: '%s')
-n, --dry-run don't write to anything - just show what would be done
- -z, --gzip compress the archive(s) using gzip (default)
- -I, --bzip2 compress the archive(s) using bzip2
- -Z, --compress compress the archive(s) using compress
--delete delete rather than archive old mail (use with caution!)
--warn-duplicate warn about duplicate Message-IDs in the same mailbox
-v, --verbose report lots of extra debugging information
@@ -934,15 +890,6 @@ def is_world_writable(path):
return (os.stat(path)[stat.ST_MODE] & stat.S_IWOTH)
-def system_or_die(command):
- """Run the command with os.system(), aborting on non-zero exit"""
- assert(command)
- rv = os.system(command)
- if (rv != 0):
- status = os.WEXITSTATUS(rv)
- unexpected_error("command '%s' returned status %d" % (command, status))
-
-
# this is where it all happens, folks
if __name__ == '__main__':
main()
diff --git a/setup.py b/setup.py
index 8452572..3ce0826 100755
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,7 @@ check_python_version() # define & run this early because 'distutils.core' is ne
from distutils.core import setup
setup(name="archivemail",
- version="0.3.0",
+ version="0.3.1",
description="archive and compress old email",
platforms="POSIX",
license="GNU GPL",
diff --git a/test_archivemail.py b/test_archivemail.py
index 22ab2f2..56ca635 100755
--- a/test_archivemail.py
+++ b/test_archivemail.py
@@ -20,9 +20,6 @@
"""
Unit-test archivemail using 'PyUnit'.
-You will need all three of 'gzip', 'bzip2' and 'compress' in your path to
-(hopefully) pass all tests.
-
TODO: add tests for:
* procmail locks already existing
* messages with corrupted date headers
@@ -208,10 +205,6 @@ class TestMboxWrite(unittest.TestCase):
class TestOptionDefaults(unittest.TestCase):
- def testCompressor(self):
- """gzip should be default compressor"""
- self.assertEqual(archivemail.options.compressor, "gzip")
-
def testVerbose(self):
"""verbose should be off by default"""
self.assertEqual(archivemail.options.verbose, 0)
@@ -318,7 +311,6 @@ class TestArchiveMboxTimestampNew(unittest.TestCase):
def testTime(self):
"""mbox timestamps should not change after no archival"""
- archivemail.options.compressor = "gzip"
archivemail.archive(self.mbox_name)
assert(os.path.exists(self.mbox_name))
new_atime = os.path.getatime(self.mbox_name)
@@ -342,7 +334,6 @@ class TestArchiveMboxTimestampMixed(unittest.TestCase):
def testTime(self):
"""mbox timestamps should not change after semi-archival"""
- archivemail.options.compressor = "gzip"
archive_name = self.mbox_name + "_archive.gz"
archivemail.archive(self.mbox_name)
assert(os.path.exists(self.mbox_name))
@@ -367,7 +358,6 @@ class TestArchiveMboxTimestampOld(unittest.TestCase):
def testTime(self):
"""mbox timestamps should not change after archival"""
- archivemail.options.compressor = "gzip"
archive_name = self.mbox_name + "_archive.gz"
archivemail.archive(self.mbox_name)
assert(os.path.exists(self.mbox_name))
@@ -393,7 +383,6 @@ class TestArchiveMboxOld(unittest.TestCase):
def testArchiveOldGzip(self):
"""archiving an old mailbox with gzip should create a valid archive"""
- archivemail.options.compressor = "gzip"
archivemail.archive(self.mbox_name)
assert(os.path.exists(self.mbox_name))
self.assertEqual(os.path.getsize(self.mbox_name), 0)
@@ -408,42 +397,9 @@ class TestArchiveMboxOld(unittest.TestCase):
assert(os.path.exists(archive_name))
assert(filecmp.cmp(archive_name, self.copy_name, shallow=0))
- def testArchiveOldBzip2(self):
- """archiving an old mailbox with bzip2 should create a valid archive"""
- archivemail.options.compressor = "bzip2"
- archivemail.archive(self.mbox_name)
- assert(os.path.exists(self.mbox_name))
- self.assertEqual(os.path.getsize(self.mbox_name), 0)
- new_mode = os.stat(self.mbox_name)[stat.ST_MODE]
- self.assertEqual(self.mbox_mode, new_mode)
-
- archive_name = self.mbox_name + "_archive.bz2"
- assert(os.path.exists(archive_name))
- os.system("bzip2 -d " + archive_name)
-
- archive_name = self.mbox_name + "_archive"
- assert(os.path.exists(archive_name))
- assert(filecmp.cmp(archive_name, self.copy_name, shallow=0))
-
- def testArchiveOldCompress(self):
- """archiving a mixed mailbox with compress should make an archive"""
- archivemail.options.compressor = "compress"
- archivemail.archive(self.mbox_name)
- assert(os.path.exists(self.mbox_name))
- self.assertEqual(os.path.getsize(self.mbox_name), 0)
- new_mode = os.stat(self.mbox_name)[stat.ST_MODE]
- self.assertEqual(self.mbox_mode, new_mode)
- archive_name = self.mbox_name + "_archive.Z"
- assert(os.path.exists(archive_name))
- os.system("compress -d " + archive_name)
- archive_name = self.mbox_name + "_archive"
- assert(os.path.exists(archive_name))
- assert(filecmp.cmp(archive_name, self.copy_name, shallow=0))
-
def tearDown(self):
archive = self.mbox_name + "_archive"
- for name in (self.mbox_name, self.copy_name, archive, \
- archive + ".gz", archive + ".bz2", archive + ".Z"):
+ for name in (self.mbox_name, self.copy_name, archive, archive + ".gz"):
if os.path.exists(name):
os.remove(name)
archivemail.options.quiet = 0
@@ -460,7 +416,6 @@ class TestArchiveMboxMixed(unittest.TestCase):
def testArchiveMixedGzip(self):
"""archiving a mixed mailbox with gzip should make an archive"""
- archivemail.options.compressor = "gzip"
archivemail.archive(self.mixed_mbox)
assert(os.path.exists(self.mixed_mbox))
assert(filecmp.cmp(self.new_mbox, self.mixed_mbox, shallow=0))
@@ -471,36 +426,10 @@ class TestArchiveMboxMixed(unittest.TestCase):
assert(os.path.exists(archive_name))
assert(filecmp.cmp(archive_name, self.old_mbox, shallow=0))
- def testArchiveMixedBzip2(self):
- """archiving a mixed mailbox with bzip2 should make an archive"""
- archivemail.options.compressor = "bzip2"
- archivemail.archive(self.mixed_mbox)
- assert(os.path.exists(self.mixed_mbox))
- assert(filecmp.cmp(self.new_mbox, self.mixed_mbox, shallow=0))
- archive_name = self.mixed_mbox + "_archive.bz2"
- assert(os.path.exists(archive_name))
- os.system("bzip2 -d " + archive_name)
- archive_name = self.mixed_mbox + "_archive"
- assert(os.path.exists(archive_name))
- assert(filecmp.cmp(archive_name, self.old_mbox, shallow=0))
-
- def testArchiveMixedCompress(self):
- """archiving a mixed mailbox with compress should make an archive"""
- archivemail.options.compressor = "compress"
- archivemail.archive(self.mixed_mbox)
- assert(os.path.exists(self.mixed_mbox))
- assert(filecmp.cmp(self.new_mbox, self.mixed_mbox, shallow=0))
- archive_name = self.mixed_mbox + "_archive.Z"
- assert(os.path.exists(archive_name))
- os.system("compress -d " + archive_name)
- archive_name = self.mixed_mbox + "_archive"
- assert(os.path.exists(archive_name))
- assert(filecmp.cmp(archive_name, self.old_mbox, shallow=0))
-
def tearDown(self):
archive = self.mixed_mbox + "_archive"
for name in (self.mixed_mbox, self.old_mbox, self.new_mbox, archive, \
- archive + ".gz", archive + ".bz2", archive + ".Z"):
+ archive + ".gz"):
if os.path.exists(name):
os.remove(name)
archivemail.options.quiet = 0