From 1db28f2b0460f0a063497febfb0897a3dc74dd2a Mon Sep 17 00:00:00 2001 From: Nikolaus Schulz Date: Mon, 28 Dec 2009 23:13:19 +0100 Subject: Refactoring of the mbox classes The RetainMbox and ArchiveMbox classes are now gone, mainly because their finalise() methods were messing with the archived mbox and the archive, respectively, which was not good OO design. The core functionality of the finalise() methods of both removed classes is moved to the objects that are manipulated: the Mbox class representing the mbox that is being archived gains a new method overwrite_with(), and there is a new class ArchiveMbox that represents the actual archive, which has an append() method (yes, unfortunately the new class has the same name like the removed class). The RetainMbox instance is replaced with a TempMbox, and the ArchiveMbox instance either with a TempMbox, or a CompressedTempMbox if archive compression is enabled. Finally, a compressed TempMbox is now a implemented as a subclass of TempMbox, named CompressedMbox. Cooperation with the StaleFiles class moves into the TempMbox class. This means slightly less detailed verbose cleanup reporting, oh well. --- archivemail.py | 215 ++++++++++++++++++++++----------------------------------- 1 file changed, 82 insertions(+), 133 deletions(-) diff --git a/archivemail.py b/archivemail.py index 45b160d..fbbbc28 100755 --- a/archivemail.py +++ b/archivemail.py @@ -137,9 +137,8 @@ class Stats: class StaleFiles: """Class to keep track of files to be deleted on abnormal exit""" - archive = None # tempfile for messages to be archived dotlock_lock = None # original_mailbox.lock - retain = None # tempfile for messages to be retained + temp_mboxes = [] # temporary retain and archive mboxes temp_dir = None # our tempfile directory container def clean(self): @@ -150,17 +149,11 @@ class StaleFiles: os.remove(self.dotlock_lock) self.dotlock_lock = None except (IOError, OSError): pass - if self.retain: - vprint("removing stale retain file '%s'" % self.retain) + while self.temp_mboxes: + mbox = self.temp_mboxes.pop() + vprint("removing stale temporary mbox '%s'" % mbox) try: - os.remove(self.retain) - self.retain = None - except (IOError, OSError): pass - if self.archive: - vprint("removing stale archive file '%s'" % self.archive) - try: - os.remove(self.archive) - self.archive = None + os.remove(mbox) except (IOError, OSError): pass if self.temp_dir: vprint("removing stale tempfile directory '%s'" % self.temp_dir) @@ -318,9 +311,8 @@ class Options: class Mbox(mailbox.UnixMailbox): - """A mostly-read-only mbox with locking. Subclasses the mailbox.UnixMailbox - class. - """ + """A mostly-read-only mbox with locking. The mbox content can only be + modified by overwriting the entire underlying file.""" def __init__(self, path): """Constructor for opening an existing 'mbox' mailbox. @@ -450,6 +442,31 @@ class Mbox(mailbox.UnixMailbox): """Return the current size of the mbox file""" return os.path.getsize(self.mbox_file_name) + def overwrite_with(self, mbox_filename): + """Overwrite the mbox content with the content of the given mbox file.""" + fin = open(mbox_filename, "r") + self.mbox_file.seek(0) + shutil.copyfileobj(fin, self.mbox_file) + self.mbox_file.truncate() + + +class ArchiveMbox: + """Simple append-only access to the archive mbox. Entirely content-agnostic.""" + + def __init__(self, path): + fd = safe_open(path) + self.mbox_file = os.fdopen(fd, "a") + + def append(self, filename): + """Append the content of the given file to the mbox.""" + fin = open(filename, "r") + shutil.copyfileobj(fin, self.mbox_file) + fin.close() + + def close(self): + """Close the mbox file.""" + self.mbox_file.close() + class TempMbox: """An write-only temporary mbox. No locking methods.""" @@ -458,6 +475,7 @@ class TempMbox: """Creates a temporary mbox file.""" fd, filename = tempfile.mkstemp(prefix=prefix) self.mbox_file_name = filename + _stale.temp_mboxes.append(filename) self.mbox_file = os.fdopen(fd, "w") # an empty gzip file is not really empty (it contains the gzip header # and trailer), so we need to track manually if this mbox is empty @@ -512,110 +530,22 @@ class TempMbox: self.mbox_file.close() def remove(self): - """Close and delete the 'mbox' mailbox file""" - file_name = self.mbox_file_name - self.close() - vprint("removing file '%s'" % self.mbox_file_name) - os.remove(file_name) - - -class RetainMbox(TempMbox): - """Class for holding messages that will be retained from the original - mailbox (ie. the messages are not considered 'old'). Extends the 'Mbox' - class. This 'mbox' file starts off as a temporary file but will eventually - overwrite the original mailbox if everything is OK. - - """ - - def __init__(self, final_mbox_file): - """Constructor - create a temporary file for the mailbox. - - Arguments: - final_mbox_file -- file object of the original mailbox that this mailbox - will replace when we call finalise() - - """ - assert(final_mbox_file) - TempMbox.__init__(self, prefix="retain") - _stale.retain = self.mbox_file_name - vprint("opened temporary retain file '%s'" % self.mbox_file_name) - self.__final_mbox_file = final_mbox_file - - def finalise(self): - """Overwrite the original mailbox with this temporary mailbox.""" - assert(self.__final_mbox_file) - new_size = self.mbox_file.tell() - old_size = self.__final_mbox_file.tell() - if new_size == old_size: - vprint("no pending changes to mbox '%s'" % \ - self.__final_mbox_file.name) - else: - self.close() - self.mbox_file = open(self.mbox_file_name, "r") - vprint("overwriting mbox '%s' with temporary mbox '%s'" % \ - (self.__final_mbox_file.name, self.mbox_file_name)) - self.__final_mbox_file.seek(0) - shutil.copyfileobj(self.mbox_file, self.__final_mbox_file) - self.__final_mbox_file.truncate() - self.remove() - - def remove(self): - """Close and delete this temporary mailbox.""" - TempMbox.remove(self) - _stale.retain = None - + """Delete the temporary mbox file.""" + os.remove(self.mbox_file_name) + _stale.temp_mboxes.remove(self.mbox_file_name) -class ArchiveMbox(TempMbox): - """Class for holding messages that will be archived from the original - mailbox (ie. the messages that are considered 'old'). Extends the 'Mbox' - class. This 'mbox' file starts off as a temporary file, and will eventually - be appended to the original archive mailbox if everything is OK. - - """ - __final_name = None +class CompressedTempMbox(TempMbox): + """A compressed version of a TempMbox.""" - def __init__(self, final_name): - """Constructor -- create a temporary archive. - - Arguments: - final_name -- the final name for this archive mailbox. The temporary - archive will be appended to this file when we call - finalise() - """ - assert(final_name) - if options.no_compress: - TempMbox.__init__(self, prefix="archive") - else: - TempMbox.__init__(self, prefix="archive.gz") - self.mbox_file.close() - self.mbox_file = gzip.GzipFile(self.mbox_file_name, "w") - _stale.archive = self.mbox_file_name - self.__final_name = final_name - - def finalise(self): - """Append the temporary archive to the final archive, and delete it - afterwards.""" - assert(self.__final_name) - self.close() - if not self.empty: - self.mbox_file = open(self.mbox_file_name, "r") - final_name = self.__final_name - if not options.no_compress: - final_name = final_name + ".gz" - vprint("writing back '%s' to '%s'" % (self.mbox_file_name, final_name)) - fd = safe_open(final_name) - final_archive = os.fdopen(fd, "a") - shutil.copyfileobj(self.mbox_file, final_archive) - final_archive.close() - self.remove() + def __init__(self, prefix=tempfile.template): + TempMbox.__init__(self, prefix) + self.raw_file = self.mbox_file + self.mbox_file = gzip.GzipFile(mode="a", fileobj=self.mbox_file) - def remove(self): - """Delete the 'mbox' mailbox file""" - # Can't call TempMbox.remove here, because it would close the mbox - # a second time. - vprint("removing file '%s'" % self.mbox_file_name) - os.remove(self.mbox_file_name) - _stale.archive = None + def close(self): + """Finish gzip file and close it.""" + self.mbox_file.close() # close GzipFile, writing gzip trailer + self.raw_file.close() class IdentityCache: @@ -1166,11 +1096,8 @@ def _archive_mbox(mailbox_name, final_archive_name): if options.dry_run or options.copy_old_mail: retain = None else: - retain = RetainMbox(original.mbox_file) - if options.dry_run or options.delete_old_mail: - archive = None - else: - archive = ArchiveMbox(final_archive_name) + retain = TempMbox(prefix="retain") + archive = prepare_temp_archive() original.lock() msg = original.next() @@ -1199,10 +1126,17 @@ def _archive_mbox(mailbox_name, final_archive_name): if original.starting_size != original.get_size(): unexpected_error("the mailbox '%s' changed size during reading!" % \ mailbox_name) - if archive: - archive.finalise() + commit_archive(archive, final_archive_name) if retain: - retain.finalise() + pending_changes = original.mbox_file.tell() != retain.mbox_file.tell() + retain.close() + if pending_changes: + vprint("overwriting mbox '%s' with temporary mbox '%s'" % \ + (original.mbox_file_name, retain.mbox_file_name)) + original.overwrite_with(retain.mbox_file_name) + else: + vprint("no changes to mbox '%s'" % original.mbox_file_name) + retain.remove() original.unlock() original.close() original.reset_timestamps() @@ -1224,12 +1158,8 @@ def _archive_dir(mailbox_name, final_archive_name, type): original = mailbox.MHMailbox(mailbox_name) else: unexpected_error("unknown type: %s" % type) - cache = IdentityCache(mailbox_name) - if options.dry_run or options.delete_old_mail: - archive = None - else: - archive = ArchiveMbox(final_archive_name) + archive = prepare_temp_archive() for msg in original: if not msg: @@ -1255,8 +1185,7 @@ def _archive_dir(mailbox_name, final_archive_name, type): else: vprint("decision: retain message") vprint("finished reading messages") - if archive: - archive.finalise() + commit_archive(archive, final_archive_name) for file_name in delete_queue: vprint("removing original message: '%s'" % file_name) try: os.remove(file_name) @@ -1349,7 +1278,7 @@ def _archive_imap(mailbox_name, final_archive_name): if not options.dry_run: if not options.delete_old_mail: - archive = ArchiveMbox(final_archive_name) + archive = prepare_temp_archive() vprint("fetching messages...") for msn in message_list: # Fetching message flags and body together always finds \Seen @@ -1368,7 +1297,7 @@ def _archive_imap(mailbox_name, final_archive_name): if options.warn_duplicates: cache.warn_if_dupe(msg) archive.write(msg) - archive.finalise() + commit_archive(archive, final_archive_name) if not options.copy_old_mail: vprint("Deleting %s messages" % len(message_list)) # do not delete more than a certain number of messages at a time, @@ -1590,6 +1519,26 @@ def clean_up_signal(signal_number, stack_frame): # at this stage unexpected_error("received signal %s" % signal_number) +def prepare_temp_archive(): + """Create temporary archive mbox.""" + if options.dry_run or options.delete_old_mail: + return None + if options.no_compress: + return TempMbox() + else: + return CompressedTempMbox() + +def commit_archive(archive, final_archive_name): + """Finalize temporary archive and write it to its final destination.""" + if not options.no_compress: + final_archive_name = final_archive_name + '.gz' + if archive: + archive.close() + if not archive.empty: + final_archive = ArchiveMbox(final_archive_name) + final_archive.append(archive.mbox_file_name) + final_archive.close() + archive.remove() def make_archive_name(mailbox_name): """Derive archive name and (relative) path from the mailbox name.""" -- cgit v1.2.3