Revision: 11183
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at January 27, 2009 12:00 by iblis
Initial Code
#!/usr/bin/env perl
#
# grabcode.pl
# Download code between <pre> tags from remote HTML pages
# Takes a list of urls as argument
use strict; use warnings;
use WWW::Mechanize;
use HTML::TreeBuilder::XPath;
use Encode;
my @urls = @ARGV;
my $browser = WWW::Mechanize->new;
$browser->agent_alias('Linux Mozilla');
#$browser->credentials('uname', 'passwd');
foreach my $url (@urls) {
my $page;
if ( $browser->get($url)->is_success() ) {
$page = $browser->content();
}
else {
warn "Skipping $url:\n$browser->status_line\n";
next;
}
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse( $page );
my $nodes = $tree->findnodes( '//pre');
while ( my $node = $nodes->shift() ) {
print encode("utf8",$node->as_text());
print "\n";
}
}
Initial URL
Initial Description
Won't work with Google code pages: they are javascript powered.
Initial Title
Batch download code between tags from remote HTML pages
Initial Tags
html, download, web
Initial Language
Perl