We Recommend

Learning Python Learning Python
The authors of Learning Python show you enough essentials of the Python scripting language to enable you to begin solving problems right away, then reveal more powerful aspects of the language one at a time. This approach is sure to appeal to programmers and system administrators who have urgent problems and a preference for learning by semi-guided experimentation.


Posted By

denilw on 08/20/08


Tagged

python imap


Versions (?)


Who likes this?

1 person has marked this snippet as a favorite

seenxu


IMAP Backup Script


Published in: Python 


URL: http://the.taoofmac.com/media/Projects/imapbackup/imapbackup.py.txt

  1. #!/usr/bin/env python
  2.  
  3. """IMAP Incremental Backup Script"""
  4. __version__ = "1.2e" # Not likely to change soon
  5. __author__ = "Rui Carmo (http://the.taoofmac.com)"
  6. __copyright__ = "(C) 2006 Rui Carmo. Code under BSD License."
  7. __contributors__ = "Bob Ippolito (fix for http://python.org/sf/1092502)"
  8.  
  9. # THIS IS BETA SOFTWARE - USE AT YOUR OWN RISK, I TAKE NO RESPONSIBILITY FOR ANY DATA LOSS
  10. # See http://the.taoofmac.com/space/Projects/imapbackup.py for more information.
  11.  
  12. import getpass, os, gc, sys, time, platform, getopt
  13. import mailbox, rfc822, imaplib, socket, email
  14. import StringIO, re, csv, sha, gzip, bz2
  15.  
  16. # Progress spinner
  17. spinner_pos = 0
  18. spinner=[c.encode("utf-8") for c in unicode("|/-\\","utf-8")]
  19.  
  20. def spin(i):
  21. """Display a cheesy spinner"""
  22. global spinner_pos
  23. if sys.stdin.isatty():
  24. sys.stdout.write("\r" + spinner[spinner_pos])
  25. sys.stdout.flush()
  26. spinner_pos+=1
  27. spinner_pos%=len(spinner)
  28.  
  29. def clean_exit():
  30. sys.stdout.write("\n")
  31.  
  32. def cli_exception(type, value, tb):
  33. if not issubclass(type, KeyboardInterrupt):
  34. sys.__excepthook__(type, value, tb)
  35. else:
  36. clean_exit()
  37.  
  38. # Make sure we get a chance to clean up the display on a tty
  39. if sys.stdin.isatty():
  40. sys.excepthook=cli_exception
  41.  
  42. # Helper class for IMAP folder list parsing
  43. class mailboxlist(csv.excel):
  44. """This class is a csv dialect for parsing the IMAP folder list"""
  45. delimiter = ' '
  46.  
  47. # Hideous fix to counteract http://python.org/sf/1092502
  48. # (which should have been fixed ages ago.)
  49. def _fixed_socket_read(self, size=-1):
  50. data = self._rbuf
  51. if size < 0:
  52. # Read until EOF
  53. buffers = []
  54. if data:
  55. buffers.append(data)
  56. self._rbuf = ""
  57. if self._rbufsize <= 1:
  58. recv_size = self.default_bufsize
  59. else:
  60. recv_size = self._rbufsize
  61. while True:
  62. data = self._sock.recv(recv_size)
  63. if not data:
  64. break
  65. buffers.append(data)
  66. return "".join(buffers)
  67. else:
  68. # Read until size bytes or EOF seen, whichever comes first
  69. buf_len = len(data)
  70. if buf_len >= size:
  71. self._rbuf = data[size:]
  72. return data[:size]
  73. buffers = []
  74. if data:
  75. buffers.append(data)
  76. self._rbuf = ""
  77. while True:
  78. left = size - buf_len
  79. recv_size = min(self._rbufsize, left) # the actual fix
  80. data = self._sock.recv(recv_size)
  81. if not data:
  82. break
  83. buffers.append(data)
  84. n = len(data)
  85. if n >= left:
  86. self._rbuf = data[left:]
  87. buffers[-1] = data[:left]
  88. break
  89. buf_len += n
  90. return "".join(buffers)
  91.  
  92. # Platform detection to enable socket patch
  93. # (issue may be present in other Pythons, but of this combination I'm sure of)
  94. if('Darwin' in platform.platform() and '2.3.5' == platform.python_version()):
  95. socket._fileobject.read = _fixed_socket_read
  96.  
  97. # Regular expressions for parsing
  98. msgmatch = re.compile("^Message\-Id\: (.+)", re.IGNORECASE + re.MULTILINE)
  99. filematch = re.compile("(.+)", re.MULTILINE)
  100. blanks = re.compile(r'\s+', re.MULTILINE)
  101. msgsize = re.compile("\d+ \(RFC822.SIZE (\d+).*\)")
  102.  
  103. # Constants
  104. IMAP_PATH_SEPARATOR='/' # May be different, depending on IMAP server
  105. UUID = '19AF1258-1AAF-44EF-9D9A-731079D6FAD7' # Used to generate Message-Ids
  106.  
  107. def collectFromIMAP(server, imap_folder):
  108. """Collects Message-Ids from a given IMAP folder"""
  109. server.select(imap_folder)
  110. sys.stdout.write(" IMAP: Scanning %s" % imap_folder)
  111. # List all messages
  112. typ, data = server.search(None, 'ALL')
  113. messages = {}
  114. i = 0
  115. for num in data[0].split():
  116. # Retrieve each individual Message-Id
  117. typ, data = server.fetch(num, '(BODY[HEADER.FIELDS (MESSAGE-ID)])')
  118. header = data[0][1].strip()
  119. # remove newlines inside Message-Id (a dumb Exchange trait)
  120. header = blanks.sub(' ', header)
  121. try:
  122. id = msgmatch.match(header).group(1)
  123. if id not in messages.keys():
  124. # avoid adding dupes
  125. messages[id] = num
  126. except:
  127. # Some messages may have no Message-Id, so we'll synthesise one
  128. # (this usually happens with Sent, Drafts and .Mac news)
  129. typ, data = server.fetch(num, '(BODY[HEADER.FIELDS (FROM TO CC DATE SUBJECT)])')
  130. header = data[0][1].strip()
  131. header = header.replace('
  132. ','\t')
  133. messages['<' + UUID + '.' + sha.sha(header).hexdigest() + '>'] = num
  134. pass
  135. i = i + 1
  136. spin(i)
  137. sys.stdout.write("\n IMAP: Found %d unique messages in %s.\n" % (len(messages.keys()),imap_folder))
  138. return messages
  139.  
  140. def collectFromFile(filename, compress):
  141. """Collects Message-Ids from a given mbox file"""
  142. # Most of this code is deprecated in Python > 2.3, since PortableUnixMailbox is no more
  143. messages = {}
  144. i = 0
  145. if os.path.exists(filename):
  146. sys.stdout.write(" FILE: Scanning %s" % filename)
  147. if compress == 'gzip':
  148. handle = gzip.GzipFile(filename,'rb')
  149. elif compress == 'bzip2':
  150. handle = bz2.BZ2File(filename,'rb')
  151. else:
  152. handle = file(filename,'rb')
  153. for message in mailbox.PortableUnixMailbox(handle):
  154. header = ''
  155. # We assume all messages on disk have message-ids
  156. try:
  157. header = ''.join(message.getfirstmatchingheader('message-id'))
  158. except KeyError:
  159. # No message ID was found. Warn the user and move on
  160. sys.stdout.write("\n WARNING: Message #%d on %s does not have Message-Id header: %s." % (i, filename, str(message.getfirstmatchingheader('message-id'))))
  161. pass
  162. header = blanks.sub(' ', header.strip())
  163. try:
  164. id = msgmatch.match(header).group(1)
  165. if id not in messages.keys():
  166. # avoid adding dupes
  167. messages[id] = id
  168. except AttributeError:
  169. # Message-Id was found but could somehow not be parsed by regexp (highly bloody unlikely)
  170. sys.stdout.write("\n WARNING: Mailbox file seems not to have been generated by this program.")
  171. sys.stdout.write("\n Message-Id scanning turned up '%s'" % header)
  172. pass
  173. i = i + 1
  174. spin(i)
  175. handle.close()
  176. sys.stdout.write("\n FILE: Found %d unique messages in %s.\n" % (len(messages.keys()),filename))
  177. return messages
  178.  
  179. def updateMailbox(server, imap_folder, mailbox, messages, existing, compress, clobber):
  180. """Append messages from IMAP folder to existing mailbox"""
  181. server.select(imap_folder)
  182. # Check if server supports PEEK
  183. # (bit redundant to do it every time, I know...)
  184. fetch_command = "(RFC822.PEEK)"
  185. response = server.fetch("1:1", fetch_command)
  186. if response[0] != "OK":
  187. fetch_command = "RFC822"
  188. else:
  189. fetch_command = "RFC822.PEEK"
  190. i = 0
  191. maxlength = total = 0
  192. if clobber == True:
  193. sys.stdout.write(' COPY: Copying from %s to %s' % (imap_folder, mailbox))
  194. else:
  195. sys.stdout.write(' APPEND: Appending from %s to %s' % (imap_folder, mailbox))
  196. # Open disk file
  197. if compress == 'gzip':
  198. mbx = gzip.GzipFile(mailbox,'ab',9)
  199. elif compress == 'bzip2':
  200. mbx = bz2.BZ2File(mailbox,'wb',512*1024,9)
  201. else:
  202. mbx = file(mailbox,'ab')
  203. for id in messages.keys():
  204. # If IMAP message is not in mbox file
  205. if id not in existing.keys():
  206. # Get raw message size
  207. typ, data = server.fetch(messages[id], '(RFC822.SIZE)')
  208. length = int(msgsize.match(data[0]).group(1))
  209. maxlength = max(length, maxlength)
  210. total = total + length
  211. # This "From" and the terminating newline below delimit messages in mbox files
  212. buffer = "From nobody %s\n" % time.strftime('%a %m %d %H:%M:%S %Y')
  213. # If this is one of our synthesised Message-Ids, insert it before the other headers
  214. if UUID in id:
  215. buffer = buffer + "Message-Id: %s\n" % id
  216. mbx.write(buffer)
  217. buffer = ''
  218. typ, data = server.fetch(messages[id], fetch_command)
  219. mbx.write(data[0][1].strip().replace('\r',''))
  220. del data
  221. gc.collect()
  222. mbx.write('\n\n')
  223. i = i + 1
  224. spin(i)
  225. mbx.close()
  226. if i == 0:
  227. sys.stdout.write('\n INFO: No new messages.\n')
  228. else:
  229. sys.stdout.write('\n SUMMARY: Appended %d messages to %s\n (%d bytes, of which the largest message was %d bytes)\n' % (i, mailbox, total, maxlength))
  230.  
  231. def scanTree(server, compress, clobber):
  232. """Parse folder listing and loop over it"""
  233. # Obtain folder listing
  234. typ, data = server.list(pattern='*')
  235. # Parse folder listing as a CSV dialect (automatically removes quotes)
  236. reader = csv.reader(StringIO.StringIO('
  237. '.join(data)),dialect='mailboxlist')
  238. # Iterate over each folder
  239. for row in reader:
  240. imap_folder = row[2]
  241. # generate a pathname for the mailbox file
  242. # (we assume that folders can contain messages, so we store messages in a '.mbox' file
  243. # inside a pathname generated from the IMAP mailbox name)
  244. path = '/'.join(imap_folder.split(IMAP_PATH_SEPARATOR))
  245. filename = '.'.join(imap_folder.split(IMAP_PATH_SEPARATOR)) + '.mbox'
  246. if compress == 'gzip':
  247. filename = filename + '.gz'
  248. elif compress == 'bzip2':
  249. filename = filename + '.bz2'
  250.  
  251. existing = {}
  252. # Collect Message-Ids from each folder
  253. messages = collectFromIMAP(server, imap_folder)
  254. if os.path.exists(filename):
  255. if clobber == True:
  256. os.remove(filename)
  257. elif compress != 'bzip2':
  258. # Collect pre-existing Message-Ids from disk file
  259. existing = collectFromFile(filename, compress)
  260. # now copy messages across
  261. updateMailbox(server, imap_folder, filename, messages, existing, compress, clobber)
  262.  
  263. def main():
  264. """Main entry point"""
  265. try:
  266. opts, args = getopt.getopt(sys.argv[1:], "z:s:u:p:y", ["compress=","server=", "username=","password=","yes-i-want-to-clobber-files"])
  267. except getopt.GetoptError:
  268. print "Usage: imapbackup [OPTIONS]"
  269. print "-y --yes-i-want-to-clobber-files does not try to append, or warn about bzip2 clobbering"
  270. print "-z (gzip|bzip2) --compress=(gzip|bzip2) create/append to compressed files (EXPERIMENTAL)"
  271. print " WARNING: bzip2 does not allow for appending, existing files will be clobbered."
  272. print "-s HOSTNAME --server=HOSTNAME connect to HOSTNAME"
  273. print "-u USERNAME --username=USERNAME with USERNAME"
  274. print "-p PASSWORD --password=PASSWORD with PASSWORD (you will be prompted for one if missing)"
  275. print "\nMailbox files will be created IN THE CURRENT WORKING DIRECTORY"
  276. sys.exit(2)
  277. username = password = server = None
  278. clobber = False
  279. compress = 'plain'
  280. for option, value in opts:
  281. if option in ("-y", "--yes-i-want-to-clobber-files"):
  282. print "WARNING: All existing mailbox files will be overwritten!"
  283. clobber = True
  284. if option in ("-z", "--compress"):
  285. if value in ('gzip','bzip2'):
  286. compress = value
  287. else:
  288. print "ERROR: Invalid compression type specified."
  289. sys.exit(2)
  290. if option in ("-s", "--server"):
  291. server = value
  292. if option in ("-u", "--username"):
  293. username = value
  294. if option in ("-p", "--password"):
  295. password = value
  296. if compress == 'bzip2' and clobber == False:
  297. print "ERROR: bzip2 compression does not allow for appending."
  298. print" Please specify -y with it if you want to remove existing archives."
  299. sys.exit(2)
  300. elif compress == 'gzip' and clobber == False:
  301. print "WARNING: Appending will work, but .mbox.gz scanning is VERY slow."
  302. print " You may want to consider using uncompressed files and"
  303. print " running gzip -9 *.mbox after the backup run."
  304. if(server is None):
  305. print "ERROR: No server specified."
  306. sys.exit(2)
  307. if(username is None):
  308. print "ERROR: No username specified."
  309. sys.exit(2)
  310. if(password is None):
  311. password = getpass.getpass()
  312. server = imaplib.IMAP4(server)
  313. server.login(username, password)
  314. scanTree(server, compress, clobber)
  315. server.logout()
  316.  
  317. if __name__ == '__main__':
  318. csv.register_dialect('mailboxlist',mailboxlist)
  319. gc.enable()
  320. main()

Report this snippet 

You need to login to post a comment.