Home > Uncategorized > check links

check links

March 25th, 2014 Leave a comment Go to comments

This code has not been tested in the last 2 years…

Here is the code:

#!/usr/bin/perl

use Time::HiRes;
use LWP::UserAgent;
use HTML::LinkExtor;

my $start_begin = [ Time::HiRes::gettimeofday( ) ];
my $base_url = $ARGV[0];
my $virtual_host = $ARGV[1];

my $a = LWP::UserAgent->new;
$a->timeout(2);
$a->default_header( "Host" => $virtual_host );

my $start = [ Time::HiRes::gettimeofday( ) ];
my $content = $a->get($base_url);
printf("GET: [%s] | %3s | %.4fs\n",substr($base_url,0,60),$content->code, Time::HiRes::tv_interval($start) );

if ( $content->code == 200 ) {

$parser = HTML::LinkExtor->new();
$parser->parse($content->content);

@links = $parser->links;

foreach $linkarray (@links) {
my @element = @$linkarray;
my $elt_type = shift @element;
#if ( $elt_type =~ /a|img/ ) {
if ( $elt_type =~ /a/i ) {
while (@element) {
my ($attr_name , $attr_value) = splice(@element, 0, 2);
#$seen{$attr_value}++ if ( $attr_value =~ /^$base_url/ );
$seen{$attr_value}++ if ( $attr_value =~ /$virtual_host/ );
#print "[$elt_type] $attr_name $attr_value\n";
}
} # else { print "Excluded [$elt_type] $attr_name $attr_value\n"; }
}

$i=0;
for(sort keys %seen) {
$start = [ Time::HiRes::gettimeofday( ) ];
my $content = $a->get($_);
printf("%02d | [%s] | %3s | %.4fs\n",++$i,substr($_,0,60),$content->code, Time::HiRes::tv_interval($start));
}

}
printf("Total time = %.4fs\n",Time::HiRes::tv_interval($start_begin));

			
Categories: Uncategorized Tags:
  1. No comments yet.
  1. No trackbacks yet.

VAMOVE *

*