Revision: 3991
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at October 14, 2007 23:49 by iblis
Initial Code
#!/usr/bin/perl -w use strict; use Getopt::Std; use LWP::Simple; use HTML::Parser; # # Grab all links from local or remote html file # perl html munging # # option -a (/ -r) grabs only absolute (/ relative) urls # get options and argument # my %opts; getopts('ar', \%opts); my $arg = shift; die "Usage: $0 [-a | -r] filename [| URL]\n" if (not defined $arg or $opts{a} && $opts{r}); # allow either -a or -r # get the page either from file or url # my $page; if ($arg =~ m!^http://!) { $page = get($arg) or die "Couldn't get $arg: $!\n"; } else { open FH, "<", $arg or die "Couldn't open $arg: $!\n"; $page = do { local $/; <FH> }; close FH; } # set the parser and parse # my $parser = HTML::Parser->new( api_version => 3, start_h => [\&start,"tagname, attr"], ); my @links; sub start { my ($tag, $attr) = @_; if ($tag =~ /^a$/ and defined $attr->{href}) { return if ($attr->{href} =~ m!^http://! and $opts{r}); # exclude absolute url when -r return if ($attr->{href} !~ m!http://! and $opts{a}); # exclude relative url when -a push @links, $attr->{href}; } } $parser->parse($page); $parser->eof; # output # map {print "$_\n"} @links;
Initial URL
Initial Description
Initial Title
Grab all links from local or remote html file
Initial Tags
html
Initial Language
Perl