/ Published in: Perl

URL: http://www.theonion.com/content/topics/American+Voices
Old and busted but still gets the job done.
Expand |
Embed | Plain Text
#!/usr/bin/perl use HTML::Entities; use LWP::Simple; print "<?xml version=\"1.0\"?>\n". "<rss version=\"2.0\">\n". "<channel>\n". "<title>The Onion - What Do You Think?</title>\n". "<link>http://www.theonion.com/content/topics/American+Voices</link>\n". "<description>The Onion's place to vent anger at All of Us USA.</description>\n". "<ttl>180</ttl>\n". "<skipDays>\n". "\t<day>Saturday</day>\n". "\t<day>Sunday</day>\n". "</skipDays>\n". "<category>Humor</category>\n". "<language>en-us</language>\n". "\n"; $html_string = get("http://www.theonion.com/content/topics/American+Voices"); while ($html_string =~ m/<a href="(.*?)" class="plain">(.*?)<\/a>/g) { $url = "http://www.theonion.com" . $1; $article = get($url); if ($article =~ m/<h2 class="title">(.*?)<\/h2>(.*?)<div id="thumbs">/s) { $title = $1; $intro = $2; } else { die("<item><title>whatdoyouthink.pl - Error grabbing article title and heading!</title></item></channel></rss>\n"); } if ($article =~ m/<div id="thumbs">(.*?)<div id="amvo_below">/s) { $content = $1; } else { die("<item><title>whatdoyouthink.pl - Error grabbing article text!</title></item></channel></rss>\n"); } print "<item>\n". "<title>" . $title . "</title>\n". "<link>" . $url . "</link>\n". "<category>Humor</category>\n". "<description>\n". "<![CDATA[" . $intro . $content . "]]>\n". "</description>\n". "</item>\n\n"; } print "</channel>\n". "</rss>";
You need to login to post a comment.