Return to Snippet

Revision: 16758
at August 14, 2009 16:46 by halotis


Initial Code
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# (C) 2009 HalOtis Marketing
# written by Matt Warren
# http://halotis.com/
 
"""
Scraper for Del.icio.us SERP.
 
This pulls the results for a match for a query on http://del.icio.us.
"""
 
import urllib2
import re
 
from BeautifulSoup import BeautifulSoup
 
def get_delicious_results(query, page_limit=10):
 
    page = 1
    links = []
 
    while page < page_limit :
        url='http://delicious.com/search?p=' + '%20'.join(query.split()) + '&context=all&lc=1&page=' + str(page)
        req = urllib2.Request(url)
        HTML = urllib2.urlopen(req).read()
        soup = BeautifulSoup(HTML)
 
        next = soup.find('a', attrs={'class':re.compile('.*next$', re.I)})
 
        #links is a list of (url, title) tuples
        links +=   [(link['href'], ''.join(link.findAll(text=True)) ) for link in soup.findAll('a', attrs={'class':re.compile('.*taggedlink.*', re.I)}) ]
 
        if next :
            page = page+1
        else :
            break
 
    return links
 
if __name__=='__main__':
    links = get_delicious_results('halotis marketing')
    print links

Initial URL
http://www.halotis.com/2009/07/31/find-links-on-del-icio-us-with-a-python-script/

Initial Description
find great websites by scraping links from delicious.com

Initial Title
Get Del.icio.us links from a search

Initial Tags
links, web

Initial Language
Python