Posted By

drydenlong on 05/15/13


Tagged

python


Versions (?)

Who likes this?

1 person have marked this snippet as a favorite

icecreamboyy


Email to HTML Script


 / Published in: Python
 

A script to parse emails and return HTML suited for email blast programs

  1. import email, getpass, imaplib, os, re, csv, zipfile, glob, threading
  2.  
  3. detach_dir = 'directory' # directory where to save attachments
  4. user = ("username")
  5. pwd = ("password")
  6.  
  7. # connecting to the gmail imap server
  8. m = imaplib.IMAP4_SSL("imap server")
  9. m.login(user,pwd)
  10. m.select("INBOX") # here you a can choose a mail box like INBOX instead
  11. # use m.list() to get all the mailboxes
  12.  
  13. resp, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
  14. items = items[0].split() # getting the mails id
  15.  
  16. for emailid in items:
  17. resp, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
  18. email_body = data[0][1] # getting the mail content
  19. mail = email.message_from_string(email_body) # parsing the mail content to get a mail object
  20.  
  21.  
  22. #Check if any attachments at all
  23. if mail.get_content_maintype() != 'multipart':
  24. continue
  25.  
  26. #print "["+mail["From"]+"] :" + mail["Subject"]
  27.  
  28. # we use walk to create a generator so we can iterate on the parts and forget about the recursive headache
  29. for part in mail.walk():
  30. # each part is a either non-multipart, or another multipart message
  31. # that contains further parts... Message is organized like a tree
  32. if part.get_content_type() == 'text/plain':
  33. content = part.get_payload()
  34. message = re.compile(r'\%(.+?)\%', re.DOTALL).findall(content)
  35. message = re.sub(r'=\\r\\', '', str(message))
  36. message = re.sub(r'\[\'', '', str(message))
  37. message = re.sub(r'\'\]', '', str(message))
  38. token = re.compile(r'\$(.+?)\$', re.DOTALL).findall(content)
  39. token = re.sub(r'\[\'', '', str(token))
  40. token = re.sub(r'\'\]', '', str(token))
  41. tag = re.compile(r'\^(.+?)\^', re.DOTALL).findall(content)
  42. tag = re.sub(r'\[\'', '', str(tag))
  43. tag = re.sub(r'\'\]', '', str(tag))
  44. print message
  45. print token
  46. print tag
  47. #print part.get_payload() # prints the raw text
  48. # multipart are just containers, so we skip them
  49. if part.get_content_maintype() == 'multipart':
  50. continue
  51.  
  52. # is this part an attachment ?
  53. if part.get('Content-Disposition') is None:
  54. continue
  55.  
  56. filename = part.get_filename()
  57. counter = 1
  58.  
  59. # if there is no filename, we create one with a counter to avoid duplicates
  60. if not filename:
  61. filename = 'part-%03d%s' % (counter, 'bin')
  62. counter += 1
  63.  
  64. att_path = os.path.join(detach_dir, filename)
  65.  
  66. #Check if its already there
  67. if not os.path.isfile(att_path) :
  68. # finally write the stuff
  69. fp = open(att_path, 'wb')
  70. fp.write(part.get_payload(decode=True))
  71. fp.close()
  72.  
  73. path = detach_dir
  74. os.chdir(path)
  75. image1 = str(glob.glob('upload-photo1*'))
  76. image2 = str(glob.glob('upload-photo2*'))
  77. image3 = str(glob.glob('upload-photo3*'))
  78. image1 = re.sub(r'\[\'', '', image1)
  79. image1 = re.sub(r'\'\]', '', image1)
  80. image2 = re.sub(r'\[\'', '', image2)
  81. image2 = re.sub(r'\'\]', '', image2)
  82. image3 = re.sub(r'\[\'', '', image3)
  83. image3 = re.sub(r'\'\]', '', image3)
  84. htmlFile = str(token)+'.html'
  85. #if tag == 'email_blast_demo':
  86. htmlCode = ('''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  87. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  88. <html xmlns="http://www.w3.org/1999/xhtml"><head>
  89. <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  90. <title>Untitled Document</title></head><body>
  91. <table width="554" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td>
  92. <img src="'''+image1+'''" width="554" height="186" /></td></tr><tr><td>
  93. <p style="font-family:Arial, Helvetica, sans-serif; font-size:11pt; line-height:14pt;">
  94. <br />Dear [Fld:FirstName],<br /><br />'''+str(message)+'''<br /><br /><a href="PLACEHOLDER">
  95. <img src="'''+image2+'''" width="248" height="38" alt="Opt-in for men\'s health tips now" /></a>
  96. <br /><br /><br /><img src="'''+image3+'''" width="167" height="62" align="right" /><br />
  97. <p style="font-family:Arial, Helvetica, sans-serif; font-size:10pt;"></td></tr></table>
  98. </body></html>''')
  99. htmlData = open(os.path.join('directory', htmlFile), 'w+')
  100. htmlData.write(htmlCode)
  101. print htmlFile+' Complete'
  102. htmlData.close()
  103. allFiles = [f for f in os.listdir(path) if not f.endswith('.zip')]
  104. for file in allFiles:
  105. archive = zipfile.ZipFile(token+'.zip', mode='a')
  106. archive.write(file)
  107. archive.close()
  108. os.unlink(file)
  109.  
  110.  
  111. # This script will access a set email account, parse the text and attachments of each email, create HTML markup
  112. # and zip the files together. This script assumes a set template for the HTML. I will most likely have to change
  113. # this in order to incorporate multiple templates. The HTML markup for each template will be sent in the email
  114. # and be parsed in the ame fashion as the `token` and `tag` variables above.
  115. #
  116. # What still needs to be done:
  117. # 1) Archive email after being zipped so that duplicates are not created
  118. # 2) Email .zip file to requestor (person who ordered)

Report this snippet  

You need to login to post a comment.