hello dear php-experts
today i have not (!) a php-question - but a perl-one.
what is aimed: i want to do a search to find out all urls that contains the following term: /participants-database/
but unfortunatley this does not work :
#!C:\Perl\bin\perl
use strict; # You always want to include both strict and warnings
use warnings;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTML::LinkExtor;
# There was no reason for this to be in a BEGIN block (and there
# are a few good reasons for it not to be)
open my $file1,"+>>", ("links.txt");
select($file1);
#The Url I want it to start at;
# Note that I've made this an array, @urls, rather than a scalar, $URL
#my @urls = (' $url =~ s$||;');
my $urls =~ ('s|/participants-database$||');
my %visited; # The % sigil indicates it's a hash
my $browser = LWP::UserAgent->new();
$browser->timeout(5);
while (@urls) {
my $url = shift @urls;
# Skip this URL and go on to the next one if we've
# seen it before
next if $visited{$url};
my $request = HTTP::Request->new(GET => $url);
my $response = $browser->request($request);
# No real need to invoke printf if we're not doing
# any formatting
if ($response->is_error()) {print $response->status_line, "\n";}
my $contents = $response->content();
# Now that we've got the url's content, mark it as
# visited
$visited{$url} = 1;
my ($page_parser) = HTML::LinkExtor->new(undef, $url);
$page_parser->parse($contents)->eof;
my @links = $page_parser->links;
foreach my $link (@links) {
print "$$link[2]\n";
push @urls, $$link[2];
}
sleep 60;
}
any idea!
my $url =~s|/bar$||;
well i tried to leave out the "my",
that was a mistake
The "my" causes a new $url to be created.
What we want is to modify the old $url.
but - unfortunatly this does not work