Posted By

bingjian on 07/25/09


Tagged

url RegularExpression brightscope 401k


Versions (?)

Look Up a Company's 401k Plan Rating from BrightScope


 / Published in: Python
 

URL: http://www.brightscope.com

  1. import re
  2.  
  3. from urllib import urlopen
  4. from urllib import urlencode
  5.  
  6. """
  7. Author: [email protected]
  8. Date: 2009/07/25
  9. Example Usage:
  10.  
  11. >>> import brightscope
  12. >>> o = brightscope.get_directory()
  13. >>> brightscope.query_score(o,'southwest')
  14. Found Southwest Airlines Co. (3)
  15. BrightScope Rating: 86
  16. Found Southwest Electric Company (2)
  17. BrightScope Rating: 55
  18. Found Southwestern Energy Company
  19. BrightScope Rating: 65
  20. Found Southwest Water Company (2)
  21. BrightScope Rating: 66
  22. Found Southwest Gas Corporation
  23. BrightScope Rating: 69
  24. Found Southwestern/Great American, Inc.
  25. BrightScope Rating: 52
  26.  
  27. """
  28.  
  29. home_url = 'http://www.brightscope.com/'
  30. rating_base_url = 'http://www.brightscope.com/401k-rating/'
  31. directory_url = 'http://www.brightscope.com/ratings/'
  32.  
  33. company_string = '<li ><a href="/401k-rating/(?P<link>.+?)/"[\s]+>(?P<name>.+?)</a></li>'
  34. company_pattern = re.compile(company_string,re.MULTILINE)
  35.  
  36. top_company_string = '<li class="top.+?"><a href="/401k-rating/(?P<link>.+?)/"[\s]+title=".+?"[\s]+>(?P<name>.+?)</a></li>'
  37. top_company_pattern = re.compile(top_company_string,re.MULTILINE)
  38.  
  39. score_string = '<span id="your_plan_rating">(?P<score>[0-9]+)</span>'
  40. score_pattern = re.compile(score_string)
  41.  
  42. amp_pattern = re.compile(r'&amp;')
  43. prime_pattern = re.compile(r'&#39;')
  44.  
  45.  
  46. def get_directory():
  47. doc = urlopen(directory_url).read()
  48. res = {}
  49. patterns = [company_pattern, top_company_pattern]
  50. for pattern in patterns:
  51. o = pattern.finditer(doc)
  52. for item in o:
  53. link, company = item.groups()
  54. amp = amp_pattern.search(company)
  55. if amp:
  56. company = amp_pattern.sub('&',company)
  57. prime = prime_pattern.search(company)
  58. if prime:
  59. company = prime_pattern.sub("'",company)
  60. res[company] = link
  61. return res
  62.  
  63.  
  64. def query_score(directory, key):
  65. for company in directory:
  66. if key.lower() in company.lower():
  67. print "Found %s"%company
  68. url = rating_base_url + directory[company]
  69. doc = urlopen(url).read()
  70. o = score_pattern.search(doc)
  71. if o:
  72. print "BrightScope Rating: %d"%int(o.groups()[0])

Report this snippet  

You need to login to post a comment.