Revision: 16758
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at August 14, 2009 16:46 by halotis
Initial Code
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# (C) 2009 HalOtis Marketing
# written by Matt Warren
# http://halotis.com/
"""
Scraper for Del.icio.us SERP.
This pulls the results for a match for a query on http://del.icio.us.
"""
import urllib2
import re
from BeautifulSoup import BeautifulSoup
def get_delicious_results(query, page_limit=10):
page = 1
links = []
while page < page_limit :
url='http://delicious.com/search?p=' + '%20'.join(query.split()) + '&context=all&lc=1&page=' + str(page)
req = urllib2.Request(url)
HTML = urllib2.urlopen(req).read()
soup = BeautifulSoup(HTML)
next = soup.find('a', attrs={'class':re.compile('.*next$', re.I)})
#links is a list of (url, title) tuples
links += [(link['href'], ''.join(link.findAll(text=True)) ) for link in soup.findAll('a', attrs={'class':re.compile('.*taggedlink.*', re.I)}) ]
if next :
page = page+1
else :
break
return links
if __name__=='__main__':
links = get_delicious_results('halotis marketing')
print links
Initial URL
http://www.halotis.com/2009/07/31/find-links-on-del-icio-us-with-a-python-script/
Initial Description
find great websites by scraping links from delicious.com
Initial Title
Get Del.icio.us links from a search
Initial Tags
links, web
Initial Language
Python