Why String's empty when call Perl in PHP

paulchen · Nov 26, 2001

Hi,there:
Could anyvody help to figure out this problem. I call Perl script in PHP. The perl script has been execute, but doesn't return anything, php code like that:
<?php $command="perl /home/zhong/perl/205/listElements.pl http://www.cnn.com 1";

$wholeString=system($command);
?>

Thanks all.

Paul

My perl script is:
#!/usr/bin/perl

use IO::File;
use IO::Seekable;

require HTTP::Request;
require LWP::UserAgent;

#Declare three arrays
@unvisited;
@;
@final;

if ($ARGV[1]==1) { #If $recurse is false
parsePage($ARGV[0]);

print "The master array size is: ".($#master + 1)." elements.\n";
print "The final array size is: ".($#final + 1)." elements.\n";
print "The unvisited array size is: ".($#unvisited + 1)." elements.\n";

}

elsif ($ARGV[1]==0) { #If $recurse is true, need to find all single files(links)
parsePage_recursion($ARGV[0]);
foreach $element (@final) {
print "$element\n";
}
print "The master array size is: ".($#master + 1)." elements.\n";
print "The unvisited array size is: ".($#unvisited + 1)." elements.\n";
#print @mail;
print "The final array size is: ".($#final + 1)." elements.\n";
}

else {
print "This is invalid argument.\n";
}

#------------------------------------------------------------------------------#
#-------------------------------Sub-routine------------------------------------#
#------------------------------------------------------------------------------#

#"sub:parsePage" looks for all link, for every resourece found on the page, do following:
#--1)Check to see if the resource has already been found.
#--2)If the resource is not in the @, enter it into @
#--3)If the resource is in the current domain, enter it into the unvisited list
#--4)Loop until all resources have been visited
#--5)Once all resources have been visited on the current page, enter the current

page URL into the Final array and deleted current page URL from the unvisited list

sub parsePage
{
my ($ele)=@_;

$ele=validate($ele);
$ele=reduceSlash($ele);

$title=substr($ele,index($ele,":\/\/")+3);

#First of all to get the root
if (index($title,"\/")>=0) {
$root=substr($ele,0,index($title,"\/")+7);
}
if (index($title,"\/")<0) {
$root=$ele;
}

#To get the Base
if (rindex($title,"\/")>=0) {
#Get the "/" number and shrink the string
#$slashNum = slashNum($title);
#$sShrinking = shrinkString($title,$slashNum-1);
#print "Shrinking String is: $sShrinking\n";

$sLen=length($title);
if (rindex($title,"\/")==$sLen-1) {		#Means the last character is "/"
	$base=$ele;
	$base=reduceSlash($ele);
}	
if (rindex($title,"\/") != $sLen-1) {		#Means the last character isn't "/",still have two cases
	$lastSlashString=substr($title,rindex($title,"\/")+1,$sLen);
	if(index($lastSlashString,".")>=0) {	#For ".html.."
		$base=substr($ele,0,rindex($title,"\/")+7);
	}	

	if(index($lastSlashString,".")<0) {		#For without ".html.."\
		$base=$ele;
		$base=reduceSlash($ele);
	}			
}

}

if (rindex($title,"\/")<0){
$base=$ele;
}
#print "The root is $root\n";
#print "The base is $base\n";

$ua = LWP::UserAgent->new;
$request = HTTP::Request->new(GET => $ele);
$response = $ua->request($request);
$text=$response->content();
#print $text;

$filehandle=IO::File->new_tmpfile() ##Create temp file to hold the html file
or die "Cannot make temporary file";

$filehandle->print("$text"); ##Write the html file to the temp file
$filehandle->seek(0,0);

#print <$filehandle>;
@lines = <$filehandle>; # Read it into an array
close($filehandle); # Close the file
#print @lines; # Print the array

$j=0;
foreach $i (@lines) {
	$text= @lines[$j++];


	if (index($text,"SRC=")>=0) {
		$text=replace($text,"SRC","href");
	}
	elsif (index($text,"src=")>=0) {
		$text=replace($text,"src","href");
	}
	elsif (index($text,"HREF=")>=0) {
		$text=replace($text,"HREF","href");
	}
	#print $text;
	#print"**";
	if (index($text,"mailto:")>=0) {
		$text=substr($text,index($text,"mailto:")+7);
		#print "Number is : ".index($text,"\"");
		$text=substr($text,0,index($text,"\""));
		if (duplicate($text)==0 && length($text)>0) {
			push (@mail,$text);				#Every single resuource add into master
		}			
	}

	while (index($text,"href=")>=0 && index($text,"href=\"javascript")<0 && index($text,"mailto:")<0) {
		$count=1;
		$text=substr($text,index($text,"href=")+6);	
		#print "\nNew String ".$count++." is: %%".$text.+"\n";			

		if (index($text,"\">")>=0 | index($text,"'>")>=0 | index($text,"\/>")>=0 | index($text,"\"")>=0){
			if (index($text,"'>")>=0) {
				$url_string=substr($text,0,index($text,"'"));
			}
			elsif (index($text,"\">")>=0 ) {
				$url_string=substr($text,0,index($text,"\""));
			}
			elsif (index($text,"\/>")>=0){
				$url_string=substr($text,0,index($text,"\""));
			}
			elsif (index($text,"\"")>=0){
				$url_string=substr($text,0,index($text,"\""));
			}

			#Need to handle the shrinking issue
			if (index($url_string,"http:\/\/")<0) {			##Means the URL String without Base: "Http://"

				if (index($url_string,"\/")==0) {									
					$url_string=$root.$url_string;				
				}
				else {
					if (index($url_string,"..\/")==0) {
					$sNum = shrinkNum($url_string);
					$newString = shrinkString($url_string,$sNum);

					if (rindex($base,"\/")==0) {
						$base=substr($base,0,rindex($base,"\/")-1);
					}
					$url_string=$base.$newString;
					} else {				
						$url_string=$base."\/".$url_string;
					}
				}				
			}

			if (duplicate($url_string)==0 && length($url_string)>0) {

				push (@master,$url_string);				#Every single resuource add into master

				if (index($url_string,$base)>=0) {
					push(@unvisited,$url_string);		#If include the domain name then add into unvisited array
				}
			}
		}
		#$text=substr($text,index($text,"src=")+5);	
	}
}
$filehandle->close;			##Close the temp file

@final=@unvisited;			#To assign array unvisited to array final
$#unvisited=-1;				#To empty array unvisited

#print "The final array size is: ".($#final + 1)." elements.\n";
foreach $element (@final) {
	print "$element\n";
}
#print "The master array size is: ".($#master + 1)." elements.\n";
#print "The final array size is: ".($#final + 1)." elements.\n";
#print "The unvisited array size is: ".($#unvisited + 1)." elements.\n";

#foreach $element (@master) {
#	print "Current element = $element\n";
#}

}

sub parsePage_recursion
{
my ($ele)=@_;

$ele=validate($ele);
$ele=reduceSlash($ele);

$title=substr($ele,index($ele,":\/\/")+3);

#First of all to get the root
if (index($title,"\/")>=0) {
$root=substr($ele,0,index($title,"\/")+7);
}
if (index($title,"\/")<0) {
$root=$ele;
}

#To get the Base
if (rindex($title,"\/")>=0) {
#Get the "/" number and shrink the string
#$slashNum = slashNum($title);
#$sShrinking = shrinkString($title,$slashNum-1);
#print "Shrinking String is: $sShrinking\n";

$sLen=length($title);
if (rindex($title,"\/")==$sLen-1) {		#Means the last character is "/"
	$base=$ele;
	$base=reduceSlash($ele);
}	
if (rindex($title,"\/") != $sLen-1) {		#Means the last character isn't "/",still have two cases
	$lastSlashString=substr($title,rindex($title,"\/")+1,$sLen);
	if(index($lastSlashString,".")>=0) {	#For ".html.."
		$base=substr($ele,0,rindex($title,"\/")+7);
	}	

	if(index($lastSlashString,".")<0) {		#For without ".html.."\
		$base=$ele;
		$base=reduceSlash($ele);
	}			
}

}

if (rindex($title,"\/")<0){
$base=$ele;
}
#print "The root is $root\n";
#print "The base is $base\n";

$ua = LWP::UserAgent->new;
$request = HTTP::Request->new(GET => $ele);
$response = $ua->request($request);
$text=$response->content();
#print $text;

$filehandle=IO::File->new_tmpfile() ##Create temp file to hold the html file
or die "Cannot make temporary file";

$filehandle->print("$text"); ##Write the html file to the temp file
$filehandle->seek(0,0);

#print <$filehandle>;
@lines = <$filehandle>; # Read it into an array
close($filehandle); # Close the file
#print @lines; # Print the array

$j=0;
foreach $i (@lines) {
	$text= @lines[$j++];


	if (index($text,"SRC=")>=0) {
		$text=replace($text,"SRC","href");
	}
	elsif (index($text,"src=")>=0) {
		$text=replace($text,"src","href");
	}
	elsif (index($text,"HREF=")>=0) {
		$text=replace($text,"HREF","href");
	}
	#print $text;
	#print"**";

	if (index($text,"mailto:")>=0) {
		$text=substr($text,index($text,"mailto:")+7);
		#print "Number is : ".index($text,"\"");
		$text=substr($text,0,index($text,"\""));
		if (duplicate($text)==0 && length($text)>0) {
			push (@mail,$text);				#Every single resuource add into master
		}			
	}

	while (index($text,"href=")>=0 && index($text,"href=\"javascript")<0 && index($text,"mailto:")<0) {
		$count=1;
		$text=substr($text,index($text,"href=")+6);	
		#print "\nNew String ".$count++." is: %%".$text.+"\n";			

		if (index($text,"\">")>=0 | index($text,"'>")>=0 | index($text,"\/>")>=0 | index($text,"\"")>=0){
			if (index($text,"'>")>=0) {
				$url_string=substr($text,0,index($text,"'"));
			}
			elsif (index($text,"\">")>=0 ) {
				$url_string=substr($text,0,index($text,"\""));
			}
			elsif (index($text,"\/>")>=0){
				$url_string=substr($text,0,index($text,"\""));
			}
			elsif (index($text,"\"")>=0){
				$url_string=substr($text,0,index($text,"\""));
			}

			#Need to handle the shrinking issue
			if (index($url_string,"http:\/\/")<0) {			##Means the URL String without Base: "Http://"

				if (index($url_string,"\/")==0) {									
					$url_string=$root.$url_string;				
				}
				else {
					if (index($url_string,"..\/")==0) {
					$sNum = shrinkNum($url_string);
					$newString = shrinkString($url_string,$sNum);

					if (rindex($base,"\/")==0) {
						$base=substr($base,0,rindex($base,"\/")-1);
					}
					$url_string=$base.$newString;
					} else {				
						$url_string=$base."\/".$url_string;
					}
				}				
			}

			if (duplicate($url_string)==0 && length($url_string)>0) {

				push (@master,$url_string);				#Every single resuource add into master

				if (index($url_string,$base)>=0) {
					push(@unvisited,$url_string);		#If include the domain name then add into unvisited array
				}
			}
		}
		#$text=substr($text,index($text,"src=")+5);	
	}
}
$filehandle->close;			##Close the temp file

#print "The master array size is: ".($#master + 1)." elements.\n";
#print "The final array size is: ".($#final + 1)." elements.\n";
#print "The unvisited array size is: ".($#unvisited + 1)." elements.\n";

#foreach $element (@master) {
#	print "Current element = $element\n";
#}

#If $recurse is true then do recurse
#The base of the recursive function 
if ($#unvisited<0) {
	return "Exit resursion.";
	break;
} 	

$eleme=shift(@unvisited);			#To pop the current element from the array @unvisited
push(@final,$eleme);
print $eleme."**".$#unvisited."**\n";
parsePage_recursion($eleme);

}

#sub--duplicate: To check whether the Object have been in the array already

To make sure String is unique

sub duplicate {
my ($elem)= shift @_;
#print "The element is: $elem\n";
$k=-1;
$flagCount=0;
foreach $m (@) {
if (($elem)eq(@[$k++])){
$flagCount++;
}
}
return $flagCount;
}

sub replace
{
my ($text1,$to_replace,$replace_with)=@_;

substr($text1,index($text1,$to_replace),
	length($to_replace),$replace_with);

	return $text1;

}

sub shrinkNum
{
my $shrinkingCount=0;
my ($shrinkingString)=@_;
while (index($shrinkingString,"..\/")==0) {
$shrinkingString=substr($shrinkingString,index($shrinkingString,"..\/")+3);
$shrinkingCount++;
}
#print $shrinkingCount;
return $shrinkingCount;
}

sub slashNum
{
my $slashCount=0;
my ($slashString)=@_;
while (index($slashString,"\/")>=0) {
$slashString=substr($slashString,index($slashString,"\/")+1);
$slashCount++;
}
#print $slashCount;
return $slashCount;
}
#$aString="../../../../news1";
#print "The shrinking number is: ".shrinkNum($aString);

sub shrinkString
{
my ($shrinkingString,$shrinkingLayer)=@_;
while ($shrinkingLayer>0) {
$shrinkingString=substr($shrinkingString,0,rindex($shrinkingString,"\/"));
$shrinkingLayer--;
#print "Current shrinking String is: ".$shrinkingString."\n";
}
return $shrinkingString;
}

#$bString="http://www.cnn.com/new1/new2/new3/new4/new5";
#print shrinkString($bString,4);

#To check whether the argv(first element) is valid
#If without "http://" then add it
sub validate
{

my ($argv)=@_;

#TO take off the "#","?"
if (index($argv,"#")>=0) {
	$argv=replace($argv,"#","");
}
if (index($argv,"?")>=0) {
	#$argv=replace($argv,"?","");
}

#print "Text is: $argv\n";

if (index($argv,"http:\/\/")==0) {
	$validString=$argv;
}	
if (index($argv,"www")==0) {	
	$validString="http:\/\/".$argv;
} 

return $validString;

}

#To check whether the second element is valid
sub validate_flag
{
my ($flag)=@_;
if ($flag==0|$flag==1) {
return 0;
} else {
return 1;
}
}

sub reduceSlash
{
my ($reducedString)=@_;
if ((substr($reducedString,length($reducedString)-1,length($reducedString))) eq "\/") {
return substr($reducedString,0,length($reducedString)-1);
} else {
return substr($reducedString,0,length($reducedString));
}
}

Anon · Nov 27, 2001

i don't think system returns all the output that the command produces...

you might want to try passthru(), or have the perl script output to a file, and then read the file (slow...)

what you should REALLY do is just port it to PHP!!!

paulchen · Nov 27, 2001

Hi,michael:
Oh, noe, I know what's wrong with that. I need modify code like that:
<?php $command="perl /home/zhong/perl/205/listElements.pl http://www.cnn.com 1";

$wholeString=system($command);
?>

Now, it works very well.
Thanks michael.

Paul