Posted By

on 09/06/08


Tagged

rss perl scraping gallup polls broke


Versions (?)

Broken Gallup Election 2008 Polls Scrape to RSS


 / Published in: Perl
 

URL: http://www.gallup.com/poll/election2008.aspx

Scrapes the presidential stuff into an RSS feed.

  1. #!/usr/bin/perl -w
  2.  
  3. use strict;
  4. use HTML::TreeBuilder;
  5. use XML::RSS;
  6. use LWP::Parallel::UserAgent;
  7. use LWP::Simple;
  8.  
  9. my @images;
  10.  
  11. my $pua = LWP::Parallel::UserAgent->new();
  12. $pua->agent('Mozilla/5.0 (X11; U; Butt Linux i686; en-US; rv:1.9.0.1) Gecko/2008072820 Firefox/3.0.1');
  13.  
  14. my $base_url = "http://www.gallup.com/poll/election2008.aspx";
  15.  
  16. my $base_tree = HTML::TreeBuilder->new_from_content(
  17. get($base_url)
  18. );
  19.  
  20. foreach my $element ($base_tree->look_down("_tag", "div", "class", "section electionanalyses")) {
  21. if (my $img_element = $element->look_down("_tag", "img")) {
  22. push(@images, $img_element->attr_get_i("src"));
  23. }
  24. }
  25.  
  26. my %trees;
  27.  
  28. foreach my $element ($base_tree->look_down("_tag", "div", "class", "section electionanalyses topics")->look_down("_tag", "a")) {
  29. $trees{$element->attr_get_i("href")} = "";
  30. }
  31.  
  32. foreach my $topic_url (keys(%trees)) {
  33. my $request = HTTP::Request->new();
  34.  
  35. $request->uri($topic_url);
  36. $request->method("GET");
  37.  
  38. $pua->register($request);
  39. }
  40.  
  41. my $html = $pua->wait();
  42.  
  43. foreach my $entry (values(%$html)) {
  44. my $tree = HTML::TreeBuilder->new_from_content(
  45. $entry->response()->content()
  46. );
  47.  
  48. push(@images, $tree->look_down("_tag", "div", "class", "cmsbody")->look_down("_tag", "img")->attr_get_i("src"));
  49. }
  50.  
  51. my $rss = XML::RSS->new();
  52.  
  53. $rss->channel( title => "Gallop Election 2008 Polls",
  54. link => $base_url,
  55. language => "en",
  56. description => "Gallup has studied human nature and behavior for more than 70 years. Gallup's reputation for delivering relevant, timely, and visionary research on what people around the world think and feel is the cornerstone of the organization. Gallup employs many of the world's leading scientists in management, economics, psychology, and sociology, and our consultants assist leaders in identifying and monitoring behavioral economic indicators worldwide. Gallup consultants help organizations boost organic growth by increasing customer engagement and maximizing employee productivity through measurement tools, coursework, and strategic advisory services. Gallup's 2,000 professionals deliver services at client organizations, through the Web, at Gallup University's campuses, and in 40 offices around the world.",
  57. ttl => "300"
  58. );
  59.  
  60. use Date::Manip;
  61. my $date = UnixDate(ParseDate(`date`), "%g");
  62.  
  63. my $description = "";
  64. foreach my $img (@images) {
  65. $description .= "<img src=\"$img\" /><br />\n";
  66. }
  67.  
  68. $rss->add_item(
  69. title => "Gallup Polls - $date",
  70. description => $description,
  71. link => $base_url,
  72. pubDate => $date,
  73. category => "politics",
  74. author => "Gallup, Incorporated",
  75. );
  76.  
  77.  
  78. print $rss->as_string();

Report this snippet  

You need to login to post a comment.