Posted By

mandric on 05/01/08

Tagged

Versions (?)

Last Edited at 05/01/08 05:20pm

Statistics

Viewed 560 times

Favorited by 0 user(s)

Related snippets

Scrape twitterlocal and save to Django model

/ Published in: Python

going to use twitterlocal to play around a bit with pygooglechart.

Expand | Embed | Plain Text

Copy this code and paste it in your HTML

#! /usr/bin/env python
# -*- coding: iso-8859-1 -*-
# vi:ts=4:et
import os,re,sys
from BeautifulSoup import BeautifulSoup
from urllib2 import urlopen, HTTPError, URLError
import datetime
import optparse
 
 
CACHE_FILE = '/tmp/twitterlocal_cache'
CACHE_TIME = 10 #minutes
 
def fetch_html(url, cache_minutes=60*2):
    """
    Return HTML string from a url, caching for two hours by default.
    """
    if not url.startswith('http://'):
        url = 'http://'+url
 
    try:
        last_mod = datetime.datetime.fromtimestamp(
                    os.stat(CACHE_FILE).st_mtime)
    except OSError:
        # probably couldn't find the file
        last_mod = datetime.datetime(1900,1,1)
        html = urlopen(url).read()
        f = file(CACHE_FILE,'w')
        f.write(html)
        f.close()
 
    delta = datetime.timedelta(minutes=cache_minutes) 
    if last_mod < (datetime.datetime.now() - delta):
        # grab url and save to cache
        html = urlopen(url).read()
        f = file(CACHE_FILE,'w')
        f.write(html)
        f.close()
    else :
        # read cache file
        f= file(CACHE_FILE)
        html = f.read()
        f.close()
 
    return html
 
 
if __name__ == '__main__':
    parser = optparse.OptionParser()
    parser.add_option('--settings')
    options, args = parser.parse_args()
    # call this script like 
    # twitterlocal/bin/scrape.py --settings  myproject.settings
    if options.settings:
        os.environ["DJANGO_SETTINGS_MODULE"] = options.settings
 
    sys.path.append(os.getcwd())
    try:
        from myproject.twitterlocal.models import TweetsStat
    except ImportError:
        print 'define your settings path, like --settings \'myproject.settings\''
        exit()
 
    html = fetch_html('http://twitterlocal.net/stats', CACHE_TIME)
    soup = BeautifulSoup(html)
 
    # Parse dates
    dates = [d for d in soup('h3')[0].contents[2].split(' ') if d]
 
    # List comprehension fun!
    m,d,y,mi,s = [int(x) for x in dates[3].split('/') + dates[4].split(':')]
    end_time = datetime.datetime(y+2000,m,d,mi,s)
 
    m,d,y,mi,s = [int(x) for x in dates[0].split('/') + dates[1].split(':')]
    start_time = datetime.datetime(y+2000,m,d,mi,s)
 
    for row in soup('tbody')[0].findAll('tr') :
        d = row.findAll('td')
        location = d[0].contents[0]
        tweets = d[1].contents[0]
        try:
            t = TweetsStat.objects.get( 
                    location=location, 
                    start_time=start_time,
                    end_time=end_time,)
        except TweetsStat.DoesNotExist:
            t = TweetsStat( 
                    location=location, 
                    tweets=tweets,
                    start_time=start_time,
                    end_time=end_time,)
 
            t.save()

Report this snippet Tweet

Comments

Subscribe to comments

Comment:

You need to login to post a comment.