Batch Download Sequence data from NCBI using EUtils


/ Published in: Python
Save to your folder(s)

Based on http://www.ncbi.nlm.nih.gov/books/NBK25498/#chapter3.Application_3_Retrieving_large


Copy this code and paste it in your HTML
  1. #!/usr/bin/python
  2. # Author: Dr. Kumaran Kandasamy
  3.  
  4. import urllib, urllib2, re
  5.  
  6. def main(giList, database, rettype):
  7. output = "NO_DATA"
  8. base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
  9. url = base+"epost.fcgi"
  10. values = {'db' : database,
  11. 'id' : giList,
  12. }
  13.  
  14. data = urllib.urlencode(values)
  15. req = urllib2.Request(url, data)
  16. response = urllib2.urlopen(req)
  17. queryKey = ""; webEnv = "";
  18. for line in response.readlines():
  19. line = line.strip()
  20. if re.search("<WebEnv>(.*)</WebEnv>", line):
  21. webEnv = re.search("<WebEnv>(.*)</WebEnv>", line).groups()[0]
  22. if re.search("<QueryKey>(.*)</QueryKey>", line):
  23. queryKey = re.search("<QueryKey>(.*)</QueryKey>", line).groups()[0]
  24. if queryKey != "" and webEnv != "":
  25. print queryKey, webEnv
  26. url = base+"efetch.fcgi";
  27. values = {
  28. 'db':database,
  29. 'query_key':queryKey,
  30. 'WebEnv':webEnv,
  31. 'rettype':rettype,
  32. 'retmode':'text'
  33. }
  34. #post the efetch URL
  35. data = urllib.urlencode(values)
  36. req = urllib2.Request(url, data)
  37. response = urllib2.urlopen(req)
  38. output = response.readlines()
  39. return output
  40.  
  41. if __name__ == "__main__":
  42. gi = "24475906,224465210,50978625,9507198"
  43. main(gi, 'nucleotide', 'fasta')

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.