Return to Snippet

Revision: 56720
at April 13, 2012 07:07 by eristoddle


Updated Code
def pin_categories():
    soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com/").download())
    cat_list = []
    for c in select(soup, ".submenu a"):
        cat_list.append(c['href'])
    return cat_list

def crawl_pin_category(category):
    #TODO: find next pages
    soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com/" + category).download())
    return harvest_pins(soup)
    
def harvest_pins(soup):
    return [p.find("a",{"class":"PinImage ImgLink"})['href'] for p in select(soup, ".pin")]

def grab_pin(pin_id):
    soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com" + pin_id).download())
    return {
        "url": select(soup, 'meta[property="og:url"]')[0]['content'],
        "title": select(soup, 'meta[property="og:title"]')[0]['content'],
        "description": select(soup, 'meta[property="og:description"]')[0]['content'],
        "image": select(soup, 'meta[property="og:image"]')[0]['content'],
        "pinboard": select(soup, 'meta[property="pinterestapp:pinboard"]')[0]['content'],
        "pinner": select(soup, 'meta[property="pinterestapp:pinner"]')[0]['content'],
        "source": select(soup, 'meta[property="pinterestapp:source"]')[0]['content'],
        "likes": select(soup, 'meta[property="pinterestapp:likes"]')[0]['content'],
        "repins": select(soup, 'meta[property="pinterestapp:repins"]')[0]['content'],
        "comments": select(soup, 'meta[property="pinterestapp:comments"]')[0]['content'],
        "actions": select(soup, 'meta[property="pinterestapp:actions"]')[0]['content'],
    }

Revision: 56719
at April 11, 2012 05:46 by eristoddle


Initial Code
import BeautifulSoup
from soupselect import select

def popular_pins(category):
    #TODO: find next pages
    soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com/" + category).download())
    return harvest_pins(soup)
    
def harvest_pins(soup):
    return [p.find("a",{"class":"PinImage ImgLink"})['href'] for p in select(soup, ".pin")]

def grab_pin(pin_id):
    soup = BeautifulSoup.BeautifulSoup(URL("https://pinterest.com" + pin_id).download())
    return {
        "url": select(soup, 'meta[property="og:url"]')[0]['content'],
        "title": select(soup, 'meta[property="og:title"]')[0]['content'],
        "description": select(soup, 'meta[property="og:description"]')[0]['content'],
        "image": select(soup, 'meta[property="og:image"]')[0]['content'],
        "pinboard": select(soup, 'meta[property="pinterestapp:pinboard"]')[0]['content'],
        "pinner": select(soup, 'meta[property="pinterestapp:pinner"]')[0]['content'],
        "source": select(soup, 'meta[property="pinterestapp:source"]')[0]['content'],
        "likes": select(soup, 'meta[property="pinterestapp:likes"]')[0]['content'],
        "repins": select(soup, 'meta[property="pinterestapp:repins"]')[0]['content'],
        "comments": select(soup, 'meta[property="pinterestapp:comments"]')[0]['content'],
        "actions": select(soup, 'meta[property="pinterestapp:actions"]')[0]['content'],
    }

Initial URL

                                

Initial Description
This requires:
* BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/
* SoupSelect - http://code.google.com/p/soupselect/

Initial Title
Pinterest Scraping with Python and BeautifulSoup

Initial Tags

                                

Initial Language
Python