Return to Snippet

Revision: 16087
at July 25, 2009 18:12 by bingjian


Updated Code
import re

from urllib import urlopen
from urllib import urlencode

"""
Author: [email protected]
Date:  2009/07/25
Example Usage:

 >>> import brightscope
 >>> o = brightscope.get_directory()
 >>> brightscope.query_score(o,'southwest')
Found Southwest Airlines Co. (3)
BrightScope Rating: 86
Found Southwest Electric Company (2)
BrightScope Rating: 55
Found Southwestern Energy Company
BrightScope Rating: 65
Found Southwest Water Company (2)
BrightScope Rating: 66
Found Southwest Gas Corporation
BrightScope Rating: 69
Found Southwestern/Great American, Inc.
BrightScope Rating: 52

"""

home_url = 'http://www.brightscope.com/'
rating_base_url = 'http://www.brightscope.com/401k-rating/'
directory_url = 'http://www.brightscope.com/ratings/'

company_string = '<li ><a href="/401k-rating/(?P<link>.+?)/"[\s]+>(?P<name>.+?)</a></li>'
company_pattern = re.compile(company_string,re.MULTILINE)

top_company_string = '<li class="top.+?"><a href="/401k-rating/(?P<link>.+?)/"[\s]+title=".+?"[\s]+>(?P<name>.+?)</a></li>'
top_company_pattern = re.compile(top_company_string,re.MULTILINE)

score_string = '<span id="your_plan_rating">(?P<score>[0-9]+)</span>'
score_pattern = re.compile(score_string)

amp_pattern = re.compile(r'&amp;')
prime_pattern = re.compile(r'&#39;')


def get_directory():
    doc = urlopen(directory_url).read()
    res = {}
    patterns = [company_pattern, top_company_pattern]
    for pattern in patterns:
        o = pattern.finditer(doc)
        for item in o:
            link, company = item.groups()
            amp = amp_pattern.search(company)
            if amp:
                company = amp_pattern.sub('&',company)
            prime = prime_pattern.search(company)
            if prime:
                company = prime_pattern.sub("'",company)
            res[company] = link
    return res


def query_score(directory, key):
    for company in directory:
        if key.lower() in company.lower():
            print "Found %s"%company
            url = rating_base_url + directory[company]
            doc = urlopen(url).read()
            o = score_pattern.search(doc)
            if o:
                print "BrightScope Rating: %d"%int(o.groups()[0])

Revision: 16086
at July 25, 2009 18:03 by bingjian


Updated Code
import re
import string

from urllib import urlopen
from urllib import urlencode

"""
Author: [email protected]
Date:  2009/07/25
Example Usage:

 >>> import brightscope
 >>> o = brightscope.get_directory()
 >>> brightscope.query_score(o,'southwest')
Found Southwest Airlines Co. (3)
BrightScope Rating: 86
Found Southwest Electric Company (2)
BrightScope Rating: 55
Found Southwestern Energy Company
BrightScope Rating: 65
Found Southwest Water Company (2)
BrightScope Rating: 66
Found Southwest Gas Corporation
BrightScope Rating: 69
Found Southwestern/Great American, Inc.
BrightScope Rating: 52

"""

home_url = 'http://www.brightscope.com/'
rating_base_url = 'http://www.brightscope.com/401k-rating/'
directory_url = 'http://www.brightscope.com/ratings/'

company_string = '<li ><a href="/401k-rating/(?P<link>.+?)/"[\s]+>(?P<name>.+?)</a></li>'
company_pattern = re.compile(company_string)

top_company_string = '<li class="top.+?"><a href="/401k-rating/(?P<link>.+?)/"[\s]+title=".+?"[\s]+>(?P<name>.+?)</a></li>'
top_company_pattern = re.compile(top_company_string,re.MULTILINE)

score_string = '<span id="your_plan_rating">(?P<score>[0-9]+)</span>'
score_pattern = re.compile(score_string)

amp_pattern = re.compile(r'&amp;')
prime_pattern = re.compile(r'&#39;')


def get_directory():
    doc = urlopen(directory_url).read()
    res = {}
    patterns = [company_pattern, top_company_pattern]
    for pattern in patterns:
        o = pattern.finditer(doc)
        for item in o:
            link, company = item.groups()
            amp = amp_pattern.search(company)
            if amp:
                company = amp_pattern.sub('&',company)
            prime = prime_pattern.search(company)
            if prime:
                company = prime_pattern.sub("'",company)
            res[company] = link
    return res


def query_score(directory, key):
    for company in directory:
        if key.lower() in company.lower():
            print "Found %s"%company
            url = rating_base_url + directory[company]
            doc = urlopen(url).read()
            o = score_pattern.search(doc)
            if o:
                print "BrightScope Rating: %d"%int(o.groups()[0])

Revision: 16085
at July 25, 2009 17:54 by bingjian


Initial Code
import re
import string

from urllib import urlopen
from urllib import urlencode


home_url = 'http://www.brightscope.com/'
rating_base_url = 'http://www.brightscope.com/401k-rating/'
directory_url = 'http://www.brightscope.com/ratings/'

company_string = '<li ><a href="/401k-rating/(?P<link>.+?)/"[\s]+>(?P<name>.+?)</a></li>'
company_pattern = re.compile(company_string)

top_company_string = '<li class="top.+?"><a href="/401k-rating/(?P<link>.+?)/"[\s]+title=".+?"[\s]+>(?P<name>.+?)</a></li>'
top_company_pattern = re.compile(top_company_string,re.MULTILINE)

score_string = '<span id="your_plan_rating">(?P<score>[0-9]+)</span>'
score_pattern = re.compile(score_string)

amp_pattern = re.compile(r'&amp;')
prime_pattern = re.compile(r'&#39;')


def get_directory():
    doc = urlopen(directory_url).read()
    res = {}
    patterns = [company_pattern, top_company_pattern]
    for pattern in patterns:
        o = pattern.finditer(doc)
        for item in o:
            link, company = item.groups()
            amp = amp_pattern.search(company)
            if amp:
                company = amp_pattern.sub('&',company)
            prime = prime_pattern.search(company)
            if prime:
                company = prime_pattern.sub("'",company)
            res[company] = link
    return res


def query_score(directory, key):
    for company in directory:
        if key.lower() in company.lower():
            print "Found %s"%company
            url = rating_base_url + directory[company]
            doc = urlopen(url).read()
            o = score_pattern.search(doc)
            if o:
                print "BrightScope Rating: %d"%int(o.groups()[0])

Initial URL
http://www.brightscope.com

Initial Description

                                

Initial Title
Look Up a Company's 401k Plan Rating from BrightScope

Initial Tags
url

Initial Language
Python