Posted By

bingjian on 07/25/09


Tagged

url RegularExpression brightscope 401k


Versions (?)

Look Up a Company's 401k Plan Rating from BrightScope


 / Published in: Python
 

URL: http://www.brightscope.com

  1. import re
  2.  
  3. from urllib import urlopen
  4. from urllib import urlencode
  5.  
  6. """
  7. Date: 2009/07/25
  8. Example Usage:
  9.  
  10. >>> import brightscope
  11. >>> o = brightscope.get_directory()
  12. >>> brightscope.query_score(o,'southwest')
  13. Found Southwest Airlines Co. (3)
  14. BrightScope Rating: 86
  15. Found Southwest Electric Company (2)
  16. BrightScope Rating: 55
  17. Found Southwestern Energy Company
  18. BrightScope Rating: 65
  19. Found Southwest Water Company (2)
  20. BrightScope Rating: 66
  21. Found Southwest Gas Corporation
  22. BrightScope Rating: 69
  23. Found Southwestern/Great American, Inc.
  24. BrightScope Rating: 52
  25.  
  26. """
  27.  
  28. home_url = 'http://www.brightscope.com/'
  29. rating_base_url = 'http://www.brightscope.com/401k-rating/'
  30. directory_url = 'http://www.brightscope.com/ratings/'
  31.  
  32. company_string = '<li ><a href="/401k-rating/(?P<link>.+?)/"[\s]+>(?P<name>.+?)</a></li>'
  33. company_pattern = re.compile(company_string,re.MULTILINE)
  34.  
  35. top_company_string = '<li class="top.+?"><a href="/401k-rating/(?P<link>.+?)/"[\s]+title=".+?"[\s]+>(?P<name>.+?)</a></li>'
  36. top_company_pattern = re.compile(top_company_string,re.MULTILINE)
  37.  
  38. score_string = '<span id="your_plan_rating">(?P<score>[0-9]+)</span>'
  39. score_pattern = re.compile(score_string)
  40.  
  41. amp_pattern = re.compile(r'&amp;')
  42. prime_pattern = re.compile(r'&#39;')
  43.  
  44.  
  45. def get_directory():
  46. doc = urlopen(directory_url).read()
  47. res = {}
  48. patterns = [company_pattern, top_company_pattern]
  49. for pattern in patterns:
  50. o = pattern.finditer(doc)
  51. for item in o:
  52. link, company = item.groups()
  53. amp = amp_pattern.search(company)
  54. if amp:
  55. company = amp_pattern.sub('&',company)
  56. prime = prime_pattern.search(company)
  57. if prime:
  58. company = prime_pattern.sub("'",company)
  59. res[company] = link
  60. return res
  61.  
  62.  
  63. def query_score(directory, key):
  64. for company in directory:
  65. if key.lower() in company.lower():
  66. print "Found %s"%company
  67. url = rating_base_url + directory[company]
  68. doc = urlopen(url).read()
  69. o = score_pattern.search(doc)
  70. if o:
  71. print "BrightScope Rating: %d"%int(o.groups()[0])

Report this snippet  

You need to login to post a comment.