Posted By

denilw on 08/20/08


Tagged

python imap


Versions (?)

Who likes this?

2 people have marked this snippet as a favorite

seenxu
gartenstuhl


IMAP Backup Script


 / Published in: Python
 

URL: http://the.taoofmac.com/media/Projects/imapbackup/imapbackup.py.txt

  1. #!/usr/bin/env python
  2.  
  3. """IMAP Incremental Backup Script"""
  4. __version__ = "1.2e" # Not likely to change soon
  5. __author__ = "Rui Carmo (http://the.taoofmac.com)"
  6. __copyright__ = "(C) 2006 Rui Carmo. Code under BSD License."
  7. __contributors__ = "Bob Ippolito (fix for http://python.org/sf/1092502)"
  8.  
  9. # THIS IS BETA SOFTWARE - USE AT YOUR OWN RISK, I TAKE NO RESPONSIBILITY FOR ANY DATA LOSS
  10. # See http://the.taoofmac.com/space/Projects/imapbackup.py for more information.
  11.  
  12. import getpass, os, gc, sys, time, platform, getopt
  13. import mailbox, rfc822, imaplib, socket, email
  14. import StringIO, re, csv, sha, gzip, bz2
  15.  
  16. # Progress spinner
  17. spinner_pos = 0
  18. spinner=[c.encode("utf-8") for c in unicode("|/-\\","utf-8")]
  19.  
  20. def spin(i):
  21. """Display a cheesy spinner"""
  22. global spinner_pos
  23. if sys.stdin.isatty():
  24. sys.stdout.write("\r" + spinner[spinner_pos])
  25. sys.stdout.flush()
  26. spinner_pos+=1
  27. spinner_pos%=len(spinner)
  28.  
  29. def clean_exit():
  30. sys.stdout.write("\n")
  31.  
  32. def cli_exception(type, value, tb):
  33. if not issubclass(type, KeyboardInterrupt):
  34. sys.__excepthook__(type, value, tb)
  35. else:
  36. clean_exit()
  37.  
  38. # Make sure we get a chance to clean up the display on a tty
  39. if sys.stdin.isatty():
  40. sys.excepthook=cli_exception
  41.  
  42. # Helper class for IMAP folder list parsing
  43. class mailboxlist(csv.excel):
  44. """This class is a csv dialect for parsing the IMAP folder list"""
  45. delimiter = ' '
  46.  
  47. # Hideous fix to counteract http://python.org/sf/1092502
  48. # (which should have been fixed ages ago.)
  49. def _fixed_socket_read(self, size=-1):
  50. data = self._rbuf
  51. if size < 0:
  52. # Read until EOF
  53. buffers = []
  54. if data:
  55. buffers.append(data)
  56. self._rbuf = ""
  57. if self._rbufsize <= 1:
  58. recv_size = self.default_bufsize
  59. else:
  60. recv_size = self._rbufsize
  61. while True:
  62. data = self._sock.recv(recv_size)
  63. if not data:
  64. break
  65. buffers.append(data)
  66. return "".join(buffers)
  67. else:
  68. # Read until size bytes or EOF seen, whichever comes first
  69. buf_len = len(data)
  70. if buf_len >= size:
  71. self._rbuf = data[size:]
  72. return data[:size]
  73. buffers = []
  74. if data:
  75. buffers.append(data)
  76. self._rbuf = ""
  77. while True:
  78. left = size - buf_len
  79. recv_size = min(self._rbufsize, left) # the actual fix
  80. data = self._sock.recv(recv_size)
  81. if not data:
  82. break
  83. buffers.append(data)
  84. n = len(data)
  85. if n >= left:
  86. self._rbuf = data[left:]
  87. buffers[-1] = data[:left]
  88. break
  89. buf_len += n
  90. return "".join(buffers)
  91.  
  92. # Platform detection to enable socket patch
  93. # (issue may be present in other Pythons, but of this combination I'm sure of)
  94. if('Darwin' in platform.platform() and '2.3.5' == platform.python_version()):
  95. socket._fileobject.read = _fixed_socket_read
  96.  
  97. # Regular expressions for parsing
  98. msgmatch = re.compile("^Message\-Id\: (.+)", re.IGNORECASE + re.MULTILINE)
  99. filematch = re.compile("(.+)", re.MULTILINE)
  100. blanks = re.compile(r'\s+', re.MULTILINE)
  101. msgsize = re.compile("\d+ \(RFC822.SIZE (\d+).*\)")
  102.  
  103. # Constants
  104. IMAP_PATH_SEPARATOR='/' # May be different, depending on IMAP server
  105. UUID = '19AF1258-1AAF-44EF-9D9A-731079D6FAD7' # Used to generate Message-Ids
  106.  
  107. def collectFromIMAP(server, imap_folder):
  108. """Collects Message-Ids from a given IMAP folder"""
  109. server.select(imap_folder)
  110. sys.stdout.write(" IMAP: Scanning %s" % imap_folder)
  111. # List all messages
  112. typ, data = server.search(None, 'ALL')
  113. messages = {}
  114. i = 0
  115. for num in data[0].split():
  116. # Retrieve each individual Message-Id
  117. typ, data = server.fetch(num, '(BODY[HEADER.FIELDS (MESSAGE-ID)])')
  118. header = data[0][1].strip()
  119. # remove newlines inside Message-Id (a dumb Exchange trait)
  120. header = blanks.sub(' ', header)
  121. try:
  122. id = msgmatch.match(header).group(1)
  123. if id not in messages.keys():
  124. # avoid adding dupes
  125. messages[id] = num
  126. except:
  127. # Some messages may have no Message-Id, so we'll synthesise one
  128. # (this usually happens with Sent, Drafts and .Mac news)
  129. typ, data = server.fetch(num, '(BODY[HEADER.FIELDS (FROM TO CC DATE SUBJECT)])')
  130. header = data[0][1].strip()
  131. header = header.replace('
  132. ','\t')
  133. messages['<' + UUID + '.' + sha.sha(header).hexdigest() + '>'] = num
  134. pass
  135. i = i + 1
  136. spin(i)
  137. sys.stdout.write("\n IMAP: Found %d unique messages in %s.\n" % (len(messages.keys()),imap_folder))
  138. return messages
  139.  
  140. def collectFromFile(filename, compress):
  141. """Collects Message-Ids from a given mbox file"""
  142. # Most of this code is deprecated in Python > 2.3, since PortableUnixMailbox is no more
  143. messages = {}
  144. i = 0
  145. if os.path.exists(filename):
  146. sys.stdout.write(" FILE: Scanning %s" % filename)
  147. if compress == 'gzip':
  148. handle = gzip.GzipFile(filename,'rb')
  149. elif compress == 'bzip2':
  150. handle = bz2.BZ2File(filename,'rb')
  151. else:
  152. handle = file(filename,'rb')
  153. for message in mailbox.PortableUnixMailbox(handle):
  154. header = ''
  155. # We assume all messages on disk have message-ids
  156. try:
  157. header = ''.join(message.getfirstmatchingheader('message-id'))
  158. except KeyError:
  159. # No message ID was found. Warn the user and move on
  160. sys.stdout.write("\n WARNING: Message #%d on %s does not have Message-Id header: %s." % (i, filename, str(message.getfirstmatchingheader('message-id'))))
  161. pass
  162. header = blanks.sub(' ', header.strip())
  163. try:
  164. id = msgmatch.match(header).group(1)
  165. if id not in messages.keys():
  166. # avoid adding dupes
  167. messages[id] = id
  168. except AttributeError:
  169. # Message-Id was found but could somehow not be parsed by regexp (highly bloody unlikely)
  170. sys.stdout.write("\n WARNING: Mailbox file seems not to have been generated by this program.")
  171. sys.stdout.write("\n Message-Id scanning turned up '%s'" % header)
  172. pass
  173. i = i + 1
  174. spin(i)
  175. handle.close()
  176. sys.stdout.write("\n FILE: Found %d unique messages in %s.\n" % (len(messages.keys()),filename))
  177. return messages
  178.  
  179. def updateMailbox(server, imap_folder, mailbox, messages, existing, compress, clobber):
  180. """Append messages from IMAP folder to existing mailbox"""
  181. server.select(imap_folder)
  182. # Check if server supports PEEK
  183. # (bit redundant to do it every time, I know...)
  184. fetch_command = "(RFC822.PEEK)"
  185. response = server.fetch("1:1", fetch_command)
  186. if response[0] != "OK":
  187. fetch_command = "RFC822"
  188. else:
  189. fetch_command = "RFC822.PEEK"
  190. i = 0
  191. maxlength = total = 0
  192. if clobber == True:
  193. sys.stdout.write(' COPY: Copying from %s to %s' % (imap_folder, mailbox))
  194. else:
  195. sys.stdout.write(' APPEND: Appending from %s to %s' % (imap_folder, mailbox))
  196. # Open disk file
  197. if compress == 'gzip':
  198. mbx = gzip.GzipFile(mailbox,'ab',9)
  199. elif compress == 'bzip2':
  200. mbx = bz2.BZ2File(mailbox,'wb',512*1024,9)
  201. else:
  202. mbx = file(mailbox,'ab')
  203. for id in messages.keys():
  204. # If IMAP message is not in mbox file
  205. if id not in existing.keys():
  206. # Get raw message size
  207. typ, data = server.fetch(messages[id], '(RFC822.SIZE)')
  208. length = int(msgsize.match(data[0]).group(1))
  209. maxlength = max(length, maxlength)
  210. total = total + length
  211. # This "From" and the terminating newline below delimit messages in mbox files
  212. buffer = "From nobody %s\n" % time.strftime('%a %m %d %H:%M:%S %Y')
  213. # If this is one of our synthesised Message-Ids, insert it before the other headers
  214. if UUID in id:
  215. buffer = buffer + "Message-Id: %s\n" % id
  216. mbx.write(buffer)
  217. buffer = ''
  218. typ, data = server.fetch(messages[id], fetch_command)
  219. mbx.write(data[0][1].strip().replace('\r',''))
  220. del data
  221. gc.collect()
  222. mbx.write('\n\n')
  223. i = i + 1
  224. spin(i)
  225. mbx.close()
  226. if i == 0:
  227. sys.stdout.write('\n INFO: No new messages.\n')
  228. else:
  229. sys.stdout.write('\n SUMMARY: Appended %d messages to %s\n (%d bytes, of which the largest message was %d bytes)\n' % (i, mailbox, total, maxlength))
  230.  
  231. def scanTree(server, compress, clobber):
  232. """Parse folder listing and loop over it"""
  233. # Obtain folder listing
  234. typ, data = server.list(pattern='*')
  235. # Parse folder listing as a CSV dialect (automatically removes quotes)
  236. reader = csv.reader(StringIO.StringIO('
  237. '.join(data)),dialect='mailboxlist')
  238. # Iterate over each folder
  239. for row in reader:
  240. imap_folder = row[2]
  241. # generate a pathname for the mailbox file
  242. # (we assume that folders can contain messages, so we store messages in a '.mbox' file
  243. # inside a pathname generated from the IMAP mailbox name)
  244. path = '/'.join(imap_folder.split(IMAP_PATH_SEPARATOR))
  245. filename = '.'.join(imap_folder.split(IMAP_PATH_SEPARATOR)) + '.mbox'
  246. if compress == 'gzip':
  247. filename = filename + '.gz'
  248. elif compress == 'bzip2':
  249. filename = filename + '.bz2'
  250.  
  251. existing = {}
  252. # Collect Message-Ids from each folder
  253. messages = collectFromIMAP(server, imap_folder)
  254. if os.path.exists(filename):
  255. if clobber == True:
  256. os.remove(filename)
  257. elif compress != 'bzip2':
  258. # Collect pre-existing Message-Ids from disk file
  259. existing = collectFromFile(filename, compress)
  260. # now copy messages across
  261. updateMailbox(server, imap_folder, filename, messages, existing, compress, clobber)
  262.  
  263. def main():
  264. """Main entry point"""
  265. try:
  266. opts, args = getopt.getopt(sys.argv[1:], "z:s:u:p:y", ["compress=","server=", "username=","password=","yes-i-want-to-clobber-files"])
  267. except getopt.GetoptError:
  268. print "Usage: imapbackup [OPTIONS]"
  269. print "-y --yes-i-want-to-clobber-files does not try to append, or warn about bzip2 clobbering"
  270. print "-z (gzip|bzip2) --compress=(gzip|bzip2) create/append to compressed files (EXPERIMENTAL)"
  271. print " WARNING: bzip2 does not allow for appending, existing files will be clobbered."
  272. print "-s HOSTNAME --server=HOSTNAME connect to HOSTNAME"
  273. print "-u USERNAME --username=USERNAME with USERNAME"
  274. print "-p PASSWORD --password=PASSWORD with PASSWORD (you will be prompted for one if missing)"
  275. print "\nMailbox files will be created IN THE CURRENT WORKING DIRECTORY"
  276. sys.exit(2)
  277. username = password = server = None
  278. clobber = False
  279. compress = 'plain'
  280. for option, value in opts:
  281. if option in ("-y", "--yes-i-want-to-clobber-files"):
  282. print "WARNING: All existing mailbox files will be overwritten!"
  283. clobber = True
  284. if option in ("-z", "--compress"):
  285. if value in ('gzip','bzip2'):
  286. compress = value
  287. else:
  288. print "ERROR: Invalid compression type specified."
  289. sys.exit(2)
  290. if option in ("-s", "--server"):
  291. server = value
  292. if option in ("-u", "--username"):
  293. username = value
  294. if option in ("-p", "--password"):
  295. password = value
  296. if compress == 'bzip2' and clobber == False:
  297. print "ERROR: bzip2 compression does not allow for appending."
  298. print" Please specify -y with it if you want to remove existing archives."
  299. sys.exit(2)
  300. elif compress == 'gzip' and clobber == False:
  301. print "WARNING: Appending will work, but .mbox.gz scanning is VERY slow."
  302. print " You may want to consider using uncompressed files and"
  303. print " running gzip -9 *.mbox after the backup run."
  304. if(server is None):
  305. print "ERROR: No server specified."
  306. sys.exit(2)
  307. if(username is None):
  308. print "ERROR: No username specified."
  309. sys.exit(2)
  310. if(password is None):
  311. password = getpass.getpass()
  312. server = imaplib.IMAP4(server)
  313. server.login(username, password)
  314. scanTree(server, compress, clobber)
  315. server.logout()
  316.  
  317. if __name__ == '__main__':
  318. csv.register_dialect('mailboxlist',mailboxlist)
  319. gc.enable()
  320. main()

Report this snippet  

You need to login to post a comment.