/ Published in: Perl
URL: http://mother3.fobby.net/
This one I like. Scrapes the blog and fixes their RSS feed to include images, video, mini updates and whatnot. Images are busted and too lazy to fix. Left up to an exercise for the reader.
Expand |
Embed | Plain Text
#!/usr/bin/perl -w use strict; use LWP::Simple; use HTML::TreeBuilder; use LWP::Parallel::UserAgent; use XML::TreeBuilder; use Encode; my $ua = LWP::Parallel::UserAgent->new(); my $xml = get("http://feeds.feedburner.com/Mother3FanTranslation?format=xml"); my $atomfeed = XML::TreeBuilder->new(); $atomfeed->parse($xml); my %entries; foreach my $item ($atomfeed->look_down("_tag", "item")) { my $link = $item->look_down("_tag", "link")->as_text(); $entries{$link} = $item; } foreach my $item ($atomfeed->look_down("_tag", "item")) { my $url = $item->look_down("_tag", "link")->as_text(); my $request = HTTP::Request->new(); $request->uri($url); $request->method("GET"); $ua->register($request); } my $html = $ua->wait(); my $response = $entry->response(); my $url = $response->base()->as_string(); my $item = $entries{$url}; my $description = $item->look_down("_tag", "content:encoded"); $description->delete_content(); my $blogentry = $response->content(); my $html = HTML::TreeBuilder->new_from_content( decode_utf8($response->content()) ); my $comments = "<br /><br />Mato Comments:<br />"; foreach ($html->look_down("_tag", "div", "style", qr/DCB6B6/)) { foreach ($_->look_down("_tag", "p")) { $comments .= "<blockquote>".$_->as_HTML()."</blockquote>"; } } $blogtree = HTML::TreeBuilder->new_from_content( decode_utf8($blogtree.$comments) ); foreach my $hrdiv ($blogtree->look_down("_tag", "div", "class", "hr")) { $hrdiv->delete(); } $description->push_content($blogtree->as_HTML()); }
You need to login to post a comment.
