/ Published in: Perl
URL: http://antwrp.gsfc.nasa.gov/apod/
Scrapes the Astronomy Picture of the Day and packages it into a messy RSS feed.
Expand |
Embed | Plain Text
#!/usr/bin/perl -w use strict; use LWP::Simple; use LWP::Parallel::UserAgent; use XML::RSS; use HTML::TreeBuilder 3.0; use Date::Manip; my $rss = XML::RSS->new(version => '2.0'); my $url = "http://antwrp.gsfc.nasa.gov/apod/"; my $pua = LWP::Parallel::UserAgent->new(); $rss->channel( title => "Astronomy Picture of the Day", language => "en", description => "Discover the cosmos! Each day a different image or photograph of our fascinating universe is featured, along with a brief explanation written by a professional astronomer.", webmaster => "", ttl => "480" ); $rss->skipDays( # day => "Sunday", # day => "Monday", # day => "Tuesday", # day => "Wednesday", # day => "Thursday", # day => "Friday", # day => "Saturday" ); my $archive = get("http://antwrp.gsfc.nasa.gov/apod/archivepix.html"); my @pages; my %dates; my %titles; while ($archive =~ m/(.*?): <a href="(.*?)">(.*?)<\/a>/g) { $dates{$url . $2} = $1; $titles{$url . $2} = $3; } for (my $i = 0; $i < 20; $i++) { my $link = $url . $pages[$i]; my $httprequest = HTTP::Request->new(); $httprequest->uri($link); $httprequest->method("GET"); $pua->register($httprequest); } my $html = $pua->wait(); my $response = $pod->response(); my $podurl = $response->base()->as_string(); my $article = HTML::TreeBuilder->new_from_content( $response->content() ); my @chunks = $article->look_down("_tag", "p"); my $description = $chunks[1]->as_HTML() . $chunks[2]->as_HTML(); my $date = UnixDate(ParseDate($dates{$podurl}), "%g"); $rss->add_item( title => "$titles{$podurl}", description => "$description", pubDate => "$date", category => "science", author => "Robert Nemiroff, Jerry Bonnell", ); }
You need to login to post a comment.
