Hello,
I recently wrote a small php script which looped through looking up webpages, parsing the html and saving a pieces of text into arrays. However, since it looks up the webpages one by one it is extremely slow.
I found this function which aims to look-up several webpages at once and I wanted to give it a try but am struggling with how to use this in my code since I am a major newbie to php (or any type of programming for that matter) .
Can anyone help me incorporate this into my code?
The function which can be found here.
function rolling_curl($urls, $rolling_window = 100) {
// make sure the rolling window isn't greater than the # of urls
$rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;
$master = curl_multi_init();
$curl_arr = array();
// add additional curl options here
$options = array(CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5);
// start the first batch of requests
for ($i = 0; $i < $rolling_window; $i++) {
$ch = curl_init();
$options[CURLOPT_URL] = $urls[$i];
curl_setopt_array($ch,$options);
curl_multi_add_handle($master, $ch);
}
do {
while(($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
if($execrun != CURLM_OK)
break;
// a request was just completed -- find out which one
while($done = curl_multi_info_read($master)) {
$info = curl_getinfo($done['handle']);
if ($info['http_code'] == 200) {
$output = curl_multi_getcontent($done['handle']);
// request successful. process output here.
// start a new request (it's important to do this before removing the old one)
$ch = curl_init();
$options[CURLOPT_URL] = $urls[$i++]; // increment i
curl_setopt_array($ch,$options);
curl_multi_add_handle($master, $ch);
// remove the curl handle that just completed
curl_multi_remove_handle($master, $done['handle']);
} else {
// request failed. add error handling.
}
}
} while ($running);
curl_multi_close($master);
}
My code:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=euc-kr" />
<title>Class 7000</title>
</head>
<body>
<p><u><b><font face="Arial" size="6">7000 Class</font></b></u></p>
<table border="1" cellpadding="0" cellspacing="0" style="border-collapse: collapse" bordercolor="#111111" width="100%" id="AutoNumber1">
<tr>
<td width="10%" align="center" bgcolor="#000000"><b>
<font face="Arial" color="#FFFFFF">Loco</font></b></td>
<td width="10%" align="center" bgcolor="#000000"><b>
<font face="Arial" color="#FFFFFF">Date</font></b></td>
<td width="10%" align="center" bgcolor="#000000"><b>
<font face="Arial" color="#FFFFFF">Allocated from</font></b></td>
<td width="10%" align="center" bgcolor="#000000"><b>
<font face="Arial" color="#FFFFFF">Allocated to</font></b></td>
<td width="10%" align="center" bgcolor="#000000"><b>
<font face="Arial" color="#FFFFFF">Train ID</font></b></td>
<td width="10%" align="center" bgcolor="#000000"><b>
<font face="Arial" color="#FFFFFF">Departure</font></b></td>
<td width="15%" align="center" bgcolor="#000000"><b>
<font face="Arial" color="#FFFFFF">Origin</font></b></td>
<td width="15%" align="center" bgcolor="#000000"><b>
<font face="Arial" color="#FFFFFF">Destination</font></b></td>
<td width="10%" align="center" bgcolor="#000000"><b>
<font face="Arial" color="#FFFFFF">Arrival</font></b></td>
</tr>
<?php
/*INCLUDE STATION LIST*/
include("stations.php");
/*INCLUDE TIMETABLE DATA*/
include("timetable.php");
/*SET-UP VARIABLES*/
$train_len = 5;
$date_len = 10;
$from_len = 4;
$to_len = 4;
$train_offset = 755;
$date_offset = 1006;
$from_offset = 1325;
$to_offset = 1571;
$pos_top_banner = 69973;
$pos_oos = 69893;
$needle4_offset = 406;
$needle1 = '</table><br>';
$needle2 = 'top_icon317_1.gif';
$needle3 = '</td>';
$needle4 = '검색 조건에 대한 자료가 없습니다';
$base_url = 'http://logis.korail.go.kr/driveinfo/TrainLocSearchp.jsp?carNo=';
$unallocated = 'Locos not currently allocated: ';
$out_of_service = 'Locos currently out of service: ';
/* FLEET TO BE LOOKED UP*/
//$loco=range(7001,7015);
//$fleet=range(7001,7015);
//$demic=array(7009);
//$loco = array_diff($fleet, $demic);
$loco[] = "7001";
$loco[] = "7002";
$loco[] = "7003";
//etc. etc.
/* LOOP*/
for ($i=0;$i<sizeof($loco);$i++){
/* GRAB FULL SITE HTML*/
$url=$base_url.$loco[$i];
$contents = file_get_contents($url);
if ( strpos($contents,$needle2) == $pos_top_banner ) {
$unallocated = $unallocated.$loco[$i]." . ";
$date_array[] = "-";
$from_array[] = "-";
$to_array[] = "-";
$train_array[] = "-";
$depart_time_array[] = "-";
$origin_array[] = "-";
$destination_array[] = "-";
$arrive_time_array[] = "-";
} else if ( strpos($contents,$needle4) == $pos_oos ) {
$out_of_service = $out_of_service.$loco[$i]." . ";
$date_array[] = "-";
$from_array[] = "-";
$to_array[] = "-";
$train_array[] = "-";
$depart_time_array[] = "-";
$origin_array[] = "-";
$destination_array[] = "-";
$arrive_time_array[] = "-";
} else {
/* ASSIGN TRAIN ID & DATE POSITION*/
$train_position = strpos($contents,$needle1) + $train_offset;
$date_position = strpos($contents,$needle1) + $date_offset;
$from_position = strpos($contents,$needle1) + $from_offset;
$to_position = strpos($contents,$needle1) + $to_offset;
/* ASSIGN TRAIN ID & DATE VALUES*/
$date = substr($contents,$date_position,$date_len);
$from = substr($contents,$from_position,11);
$new_from_len = strpos($from,$needle3);
$new_from = substr($from,0,$new_from_len);
$new_to_position = $to_position + $new_from_len;
$to = substr($contents,$new_to_position,11);
$new_to_len = strpos($to,$needle3);
$new_to = substr($to,0,$new_to_len);
$train = chop(substr($contents,$train_position,$train_len));
$origin = substr($contents,$origin_position,$origin_len);
$destination = substr($contents,$destination_position,$destination_len);
$date_array[] = substr($date,8,2).substr($date,7,1).substr($date,5,3).substr($date,2,2);
$from_array[] = $station[$new_from];
$to_array[] = $station[$new_to];
$train_array[] = $train;
$depart_time_array[] = $departure_time_by_train[$train] ;
$origin_array[] = $origin_by_train[$train];
$destination_array[] = $destination_by_train[$train];
$arrive_time_array[] = $arrival_time_by_train[$train];
?>
<tr>
<td width="10%" align="center"><font face="Arial"><?=$loco[$i]?></font></td>
<td width="10%" align="center"><font face="Arial"><?=$date_array[$i]?></font></td>
<td width="10%" align="center"><font face="Arial"><?=$from_array[$i]?></font></td>
<td width="10%" align="center"><font face="Arial"><?=$to_array[$i]?></font></td>
<td width="10%" align="center"><font face="Arial"><?=$train_array[$i]?></font></td>
<td width="10%" align="center"><font face="Arial"><?=$depart_time_array[$i]?></font></td>
<td width="15%" align="center"><font face="Arial"><?=$origin_array[$i]?></font></td>
<td width="15%" align="center"><font face="Arial"><?=$destination_array[$i]?></font></td>
<td width="10%" align="center"><font face="Arial"><?=$arrive_time_array[$i]?></font></td>
</tr>
<?php
}
}
?>
</table>
<font face="Arial">
<?php
echo "<br>";
echo $unallocated;
echo "<br><br>";
echo $out_of_service;
echo "<br><br>";
echo(gmstrftime("Report produced on %a %b %d, %Y at %X ",time()));
?>
</font>
</body>
</html>