/ Published in: Perl
URL: http://www.mnftiu.cc/mnftiu.cc/war.html
Really old and busted Get Your War On scraper but it still works so there.
Expand |
Embed | Plain Text
#!/usr/bin/perl use HTML::Entities; use LWP::Simple; # print a feed header print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n". "<rdf:RDF\n". "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n". "xmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n". "xmlns=\"http://my.netscape.com/rdf/simple/0.9/\">\n". "<channel>\n". " <title>Get Your War On</title>\n". " <link>http://www.mnftiu.cc/mnftiu.cc/war.html</link>\n". " <description>A webcomic about our 9/11 epilogue.</description>\n". "</channel>\n\n"; $html_string = get ("http://www.mnftiu.cc/mnftiu.cc/war.html"); $i = 2; while ($html_string =~ m/<a href="war(\d|\d\d).html">(\d|\d\d)<\/a>/g) { $i++ } $url = "http://www.mnftiu.cc/mnftiu.cc/war" . $i . ".html"; $html_string = get ($url); while ($html_string =~ m/<img src="images\/gywo.(.*?).gif" border=0>/g) { print "<item>\n". "<title>" . $1 . "</title>\n". "<link>" . $url . "</link>\n". "<description><img src=\"http://www.mnftiu.cc/mnftiu.cc/images/gywo." . $1 . ".gif\"></description>\n"; print "</item>\n\n"; } print "</rdf:RDF>\n";
You need to login to post a comment.
