I'm trying to dereference the @{$links} produced
by WWW::SimpleRobot and am having a heck of
a time getting it done. Can anybody help?
You can see some of the things I have tried
below.
I know I can do this link extraction myself with
LinkExtor, or at least think I can do it, but
I'd like to know how to dereference this script.
Mike Flannigan
#
#
#
#!/usr/local/bin/perl
#
use strict;
use warnings;
use WWW::SimpleRobot;
my $robot = WWW::SimpleRobot->new(
URLS => [ 'http://www.portofhouston.com/' ],
FOLLOW_REGEX => "^http://www.portofhouston.com//",
DEPTH => 1,
TRAVERSAL => 'depth',
VISIT_CALLBACK =>
sub {
my ( $url, $depth, $html, $links ) = @_;
my @linkder = @{$links};
print STDERR "Visiting $url\n\n";
# print STDERR "Depth = $depth\n";
# print STDERR "HTML = $html\n";
# print STDERR "Links = @{$links}\n";
# print STDERR "Links = @linkder\n";
# foreach (@linkder){
# print STDERR "$_\n";
# }
for (my $num = 0; $num <= $#linkder; $num++) {
print STDERR "$linkder[$num]\n";
}
# for (my $num = 0; $num <= $#linkder; $num++) {
# print STDERR "${$linkder}[$num]\n";
# }
}
,
BROKEN_LINK_CALLBACK =>
sub {
my ( $url, $linked_from, $depth ) = @_;
print STDERR "$url looks like a broken link on
$linked_from\n";
print STDERR "Depth = $depth\n";
}
);
$robot->traverse;
my @urls = @{$robot->urls};
my @pages = @{$robot->pages};
for my $page ( @pages )
{
my $url = $page->{url};
my $depth = $page->{depth};
my $modification_time = $page->{modification_time};
}
print "\nAll done.\n";
__END__
--
To unsubscribe, e-mail: beginners-unsubscr...@perl.org
For additional commands, e-mail: beginners-h...@perl.org
http://learn.perl.org/