/ Published in: Python
find great websites by scraping links from delicious.com
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
#!/usr/bin/env python # -*- coding: utf-8 -*- # (C) 2009 HalOtis Marketing # written by Matt Warren # http://halotis.com/ """ Scraper for Del.icio.us SERP. This pulls the results for a match for a query on http://del.icio.us. """ import urllib2 import re from BeautifulSoup import BeautifulSoup def get_delicious_results(query, page_limit=10): page = 1 links = [] while page < page_limit : url='http://delicious.com/search?p=' + '%20'.join(query.split()) + '&context=all&lc=1&page=' + str(page) req = urllib2.Request(url) HTML = urllib2.urlopen(req).read() soup = BeautifulSoup(HTML) next = soup.find('a', attrs={'class':re.compile('.*next$', re.I)}) #links is a list of (url, title) tuples links += [(link['href'], ''.join(link.findAll(text=True)) ) for link in soup.findAll('a', attrs={'class':re.compile('.*taggedlink.*', re.I)}) ] if next : page = page+1 else : break return links if __name__=='__main__': links = get_delicious_results('halotis marketing') print links
URL: http://www.halotis.com/2009/07/31/find-links-on-del-icio-us-with-a-python-script/