Revision: 56720
Updated Code
at April 13, 2012 07:07 by eristoddle
Updated Code
def pin_categories():
soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com/").download())
cat_list = []
for c in select(soup, ".submenu a"):
cat_list.append(c['href'])
return cat_list
def crawl_pin_category(category):
#TODO: find next pages
soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com/" + category).download())
return harvest_pins(soup)
def harvest_pins(soup):
return [p.find("a",{"class":"PinImage ImgLink"})['href'] for p in select(soup, ".pin")]
def grab_pin(pin_id):
soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com" + pin_id).download())
return {
"url": select(soup, 'meta[property="og:url"]')[0]['content'],
"title": select(soup, 'meta[property="og:title"]')[0]['content'],
"description": select(soup, 'meta[property="og:description"]')[0]['content'],
"image": select(soup, 'meta[property="og:image"]')[0]['content'],
"pinboard": select(soup, 'meta[property="pinterestapp:pinboard"]')[0]['content'],
"pinner": select(soup, 'meta[property="pinterestapp:pinner"]')[0]['content'],
"source": select(soup, 'meta[property="pinterestapp:source"]')[0]['content'],
"likes": select(soup, 'meta[property="pinterestapp:likes"]')[0]['content'],
"repins": select(soup, 'meta[property="pinterestapp:repins"]')[0]['content'],
"comments": select(soup, 'meta[property="pinterestapp:comments"]')[0]['content'],
"actions": select(soup, 'meta[property="pinterestapp:actions"]')[0]['content'],
}
Revision: 56719
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at April 11, 2012 05:46 by eristoddle
Initial Code
import BeautifulSoup
from soupselect import select
def popular_pins(category):
#TODO: find next pages
soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com/" + category).download())
return harvest_pins(soup)
def harvest_pins(soup):
return [p.find("a",{"class":"PinImage ImgLink"})['href'] for p in select(soup, ".pin")]
def grab_pin(pin_id):
soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com" + pin_id).download())
return {
"url": select(soup, 'meta[property="og:url"]')[0]['content'],
"title": select(soup, 'meta[property="og:title"]')[0]['content'],
"description": select(soup, 'meta[property="og:description"]')[0]['content'],
"image": select(soup, 'meta[property="og:image"]')[0]['content'],
"pinboard": select(soup, 'meta[property="pinterestapp:pinboard"]')[0]['content'],
"pinner": select(soup, 'meta[property="pinterestapp:pinner"]')[0]['content'],
"source": select(soup, 'meta[property="pinterestapp:source"]')[0]['content'],
"likes": select(soup, 'meta[property="pinterestapp:likes"]')[0]['content'],
"repins": select(soup, 'meta[property="pinterestapp:repins"]')[0]['content'],
"comments": select(soup, 'meta[property="pinterestapp:comments"]')[0]['content'],
"actions": select(soup, 'meta[property="pinterestapp:actions"]')[0]['content'],
}
Initial URL
Initial Description
This requires: * BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/ * SoupSelect - http://code.google.com/p/soupselect/
Initial Title
Pinterest Scraping with Python and BeautifulSoup
Initial Tags
Initial Language
Python