Return to Snippet

Revision: 49987
at August 3, 2011 21:35 by itskkumaran


Updated Code
#!/usr/bin/python
# Author: Dr. Kumaran Kandasamy
# E-Mail: [email protected]

import urllib, urllib2, re

def main(giList, database, rettype):
	output = "NO_DATA"
	base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
	url = base+"epost.fcgi"
	values = {'db' : database,
          'id' : giList,
          }

	data = urllib.urlencode(values)
	req = urllib2.Request(url, data)
	response = urllib2.urlopen(req)
	queryKey = ""; webEnv = "";
	for line in response.readlines():
		line = line.strip()
		if re.search("<WebEnv>(.*)</WebEnv>", line):
			webEnv = re.search("<WebEnv>(.*)</WebEnv>", line).groups()[0]
		if re.search("<QueryKey>(.*)</QueryKey>", line):
			queryKey = re.search("<QueryKey>(.*)</QueryKey>", line).groups()[0]
	if queryKey != "" and webEnv != "":
		print queryKey, webEnv
		url = base+"efetch.fcgi";
		values = {
					'db':database,
					'query_key':queryKey,
					'WebEnv':webEnv,
					'rettype':rettype,
					'retmode':'text'
				}
		#post the efetch URL
		data = urllib.urlencode(values)
		req = urllib2.Request(url, data)
		response = urllib2.urlopen(req)
		output = response.readlines()
	return output

if __name__ == "__main__":
	gi = "24475906,224465210,50978625,9507198"
	main(gi, 'nucleotide', 'fasta')

Revision: 49986
at August 3, 2011 21:33 by itskkumaran


Updated Code
#!/usr/bin/python
# Author: Dr. Kumaran Kandasamy
# E-Mail: [email protected]

import urllib, urllib2, re


def main(giList, database):
	output = "NO_DATA"
	base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
	url = base+"epost.fcgi"
	values = {'db' : database,
          'id' : giList,
          }

	data = urllib.urlencode(values)
	req = urllib2.Request(url, data)
	response = urllib2.urlopen(req)
	queryKey = ""; webEnv = "";
	for line in response.readlines():
		line = line.strip()
		if re.search("<WebEnv>(.*)</WebEnv>", line):
			webEnv = re.search("<WebEnv>(.*)</WebEnv>", line).groups()[0]
		if re.search("<QueryKey>(.*)</QueryKey>", line):
			queryKey = re.search("<QueryKey>(.*)</QueryKey>", line).groups()[0]
	if queryKey != "" and webEnv != "":
		print queryKey, webEnv
		url = base+"efetch.fcgi";
		values = {
					'db':database,
					'query_key':queryKey,
					'WebEnv':webEnv,
					'rettype':'fasta',
					'retmode':'text'
				}
		#post the efetch URL
		data = urllib.urlencode(values)
		req = urllib2.Request(url, data)
		response = urllib2.urlopen(req)
		output = response.readlines()
	return None

if __name__ == "__main__":
	gi = "24475906,224465210,50978625,9507198"
	main(gi, 'nucleotide')

Revision: 49985
at August 3, 2011 21:32 by itskkumaran


Initial Code
#!/usr/bin/python

import urllib, urllib2, re


def main(giList, database):
	output = "NO_DATA"
	base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
	url = base+"epost.fcgi"
	values = {'db' : database,
          'id' : giList,
          }

	data = urllib.urlencode(values)
	req = urllib2.Request(url, data)
	response = urllib2.urlopen(req)
	queryKey = ""; webEnv = "";
	for line in response.readlines():
		line = line.strip()
		if re.search("<WebEnv>(.*)</WebEnv>", line):
			webEnv = re.search("<WebEnv>(.*)</WebEnv>", line).groups()[0]
		if re.search("<QueryKey>(.*)</QueryKey>", line):
			queryKey = re.search("<QueryKey>(.*)</QueryKey>", line).groups()[0]
	if queryKey != "" and webEnv != "":
		print queryKey, webEnv
		url = base+"efetch.fcgi";
		values = {
					'db':database,
					'query_key':queryKey,
					'WebEnv':webEnv,
					'rettype':'fasta',
					'retmode':'text'
				}
		#post the efetch URL
		data = urllib.urlencode(values)
		req = urllib2.Request(url, data)
		response = urllib2.urlopen(req)
		output = response.readlines()
	return None

if __name__ == "__main__":
	gi = "24475906,224465210,50978625,9507198"
	main(gi, 'nucleotide')

Initial URL


Initial Description
Based on http://www.ncbi.nlm.nih.gov/books/NBK25498/#chapter3.Application_3_Retrieving_large

Initial Title
Batch Download Sequence data from NCBI using EUtils

Initial Tags
download

Initial Language
Python