From 6b0fefc45f8d386324ed30324d073fc1e6cd4b87 Mon Sep 17 00:00:00 2001 From: Nikolaus Schulz Date: Tue, 5 Jul 2011 23:10:27 +0200 Subject: IMAP: add support for non-ascii mailbox names --- archivemail | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- test_archivemail | 21 ++++++++++ 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/archivemail b/archivemail index e1abbee..58d833d 100755 --- a/archivemail +++ b/archivemail @@ -68,11 +68,14 @@ import time import urlparse import errno import socket +import locale # From_ mangling regex. from_re = re.compile(r'^From ', re.MULTILINE) imapsize_re = re.compile(r'^(?P[0-9]+) \(RFC822\.SIZE (?P[0-9]+)\)') +userencoding = locale.getpreferredencoding() + ############## class definitions ############### class ArchivemailException(Exception): @@ -1410,6 +1413,107 @@ def _archive_imap(mailbox_name): ############### IMAP functions ############### + +# First, some IMAP modified UTF-7 support functions. + +# The modified BASE64 alphabet. 64 characters, each one encodes 6 Bit. +mb64alpha = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+,' + +def isprint_ascii(char): + """Test for an ASCII printable character.""" + return 0x20 <= ord(char) and ord(char) <= 0x7e + +def mod_utf7_encode(ustr): + """Encode unicode string object in modified UTF-7.""" + + def mb64_encode(tomb64): + """Encode unicode string object as a modified UTF-7 shifted sequence + in modified BASE64.""" + u16be = tomb64.encode('utf_16_be') + mb64 = "" + # Process 24-bit blocks, encoding them in 6-bit steps. + for block in [u16be[i:i+3] for i in range(0, len(u16be), 3)]: + idx = 0 + shift = 2 + for octet in block: + mb64 += mb64alpha[idx | (ord(octet) >> shift)] + idx = (ord(octet) << (6-shift)) & 0x3f + shift += 2 + mb64 += mb64alpha[idx] + return mb64 + + mu7 = "" + tomb64 = u"" + for c in ustr: + if not isprint_ascii(c): + tomb64 += c + continue + if tomb64: + mu7 += '&' + mb64_encode(tomb64) + '-' + tomb64 = u"" + if c == '&': + mu7 += '&-' + else: + mu7 += str(c) + if tomb64: + mu7 += '&' + mb64_encode(tomb64) + '-' + return mu7 + +def mod_utf7_decode(mu7): + """Decode a modified UTF-7 encoded string to an unicode string object.""" + + def mb64_decode(mb64): + """Decode a modified UTF-7 shifted sequence from modified BASE64 to an + unicode string object.""" + if not mb64: + # A null shift '&-' decodes to '&'. + return u"&" + u16be = "" + # Process blocks of 4 BASE64 characters, decoding each char to 6 bits. + for block in [mb64[i:i+4] for i in range(0, len(mb64), 4)]: + carrybits = mb64alpha.index(block[0]) << 2 + shift = 4 + for char in block[1:]: + bits = mb64alpha.index(char) + u16be += chr(carrybits | (bits >> shift)) + carrybits = (bits << (8-shift)) & 0xff + shift -= 2 + if carrybits: + raise ValueError("Ill-formed modified UTF-7 string: " + "trailing bits in shifted sequence") + return u16be.decode('utf_16_be') + + ustr = u"" + mb64 = "" + inmb64 = False + for octet in mu7: + if not isprint_ascii(octet): + raise ValueError("Ill-formed modified UTF-7 string: " + "contains non-printable ASCII" % ord(octet)) + if not inmb64: + if octet == '&': + inmb64 = True + else: + ustr += octet + continue + + if octet in mb64alpha: + mb64 += octet + continue + + if octet == '-': + inmb64 = False + ustr += mb64_decode(mb64) + mb64 = "" + else: + break # This triggers the exception below. + + if inmb64: + raise ValueError("Ill-formed modified UTF-7 string: " + "unterminated BASE64 sequence") + return ustr + + def imap_quote(astring): """Quote an IMAP `astring' string (see RFC 3501, section "Formal Syntax").""" if astring.startswith('"') and astring.endswith('"'): @@ -1520,7 +1624,8 @@ def imap_smart_select(srv, mailbox): vprint("examining imap folder '%s' read-only" % mailbox) else: vprint("selecting imap folder '%s'" % mailbox) - result, response = srv.select(imap_quote(mailbox), roflag) + imap_mailbox = mod_utf7_encode(mailbox.decode(userencoding)) + result, response = srv.select(imap_quote(imap_mailbox), roflag) if result != 'OK': unexpected_error("selecting '%s' failed; server says: '%s'." \ % (mailbox, response[0])) @@ -1548,6 +1653,7 @@ def imap_find_mailboxes(srv, mailbox): vprint("Looking for mailboxes matching '%s'..." % curbox) else: vprint("Looking for mailbox '%s'..." % curbox) + curbox = mod_utf7_encode(curbox.decode(userencoding)) result, response = srv.list(pattern=imap_quote(curbox)) if result != 'OK': unexpected_error("LIST command failed; " \ @@ -1577,6 +1683,13 @@ def imap_find_mailboxes(srv, mailbox): else: attrs, name = m.groups() name = imap_unquote(name) + try: + name = mod_utf7_decode(name) + except ValueError: + vprint("Mailbox name '%s' returned by server doesn't look like " + "modified UTF-7" % name) + name = name.decode('utf-8') + name = name.encode(userencoding) if '\\noselect' in attrs.lower().split(): vprint("skipping not selectable mailbox '%s'" % name) continue diff --git a/test_archivemail b/test_archivemail index d3455b2..c9f217c 100755 --- a/test_archivemail +++ b/test_archivemail @@ -639,6 +639,27 @@ class TestIMAPQuoting(unittest.TestCase): self.assertEqual(unquoted, archivemail.imap_unquote(quoted)) +########## Modified UTF-7 support functions ########## + +class TestModUTF7(unittest.TestCase): + goodpairs = ( + (u"A\N{NOT IDENTICAL TO}A.", "A&ImI-A."), + (u"Hi Mom -\N{WHITE SMILING FACE}-!", "Hi Mom -&Jjo--!"), + (u"~peter/mail/\u53f0\u5317/\u65e5\u672c\u8a9e", + "~peter/mail/&U,BTFw-/&ZeVnLIqe-") + ) + + def testEncode(self): + """Ensure that encoding text in modified UTF-7 works properly.""" + for text, code in self.goodpairs: + self.assertEqual(archivemail.mod_utf7_encode(text), code) + + def testDecode(self): + """Ensure that decoding modified UTF-7 to text works properly.""" + for text, code in self.goodpairs: + self.assertEqual(archivemail.mod_utf7_decode(code), text) + + ########## acceptance testing ########### class TestArchive(TestCaseInTempdir): -- cgit v1.2.3