Hello,
First let me say I am not really a programmer or fluent in PHP...I am somewhat familiar with some basics though...
Below is a class (script) I found on the web and I am interested in learning how to use it to grab and parse some data from some sites
To use this class (correct me if I'm wrong) I am thinking I will need to write a script that "requires" this class? ...the script will have to cite a specific URL where it will collect the data in a temp file in a temp dir (see line 26)
function Grabber($url,$tmpdir='tmp/',$tmpfile='tmp.txt',$ifmod=86400)
beyond that I am about clueless about what I need to do to use this class/script...
I am familiar with the code required to open and write files such as flat file databases etc if this is what is required...
I don't expect anyone to entirely write the required script to take advantage of this class but any help that will get me started would be greatly appreciated...
thqanks in advance
here is the class:
<?php
/*******************************************************************************
* grabber.php
* by voyager, 2003
*
* A class which is usefull for grabbing any information of any site over the net.
* It can retusrn a single value (text) or an array (texts) with given 'markup'
* strings. The class uses PHP CURL functions so you need CULR installed on your
* server
********************************************************************************/
class Grabber
{
var $content;
var $content_array;
var $noURL; //this is the boolean which will mark if to open the URL or not
var $text; //the text without starting and ending unneeded parts
var $searchar; // the searched array
var $searchtxt; //the searched text
// The constructor opens an URL and writes it in give file and dir
// if 4th argument is given it check if this file exists already
// when is the last modification and if it is older, opens the URL,
// else opens the file. If $ifmod=0 it always opens the URL
//it defaults to 24 hours
function Grabber($url,$tmpdir='tmp/',$tmpfile='tmp.txt',$ifmod=86400)
{
$this->content="";
$ch = curl_init ();
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_TIMEOUT, 60);
$useragent = "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.6) Gecko/20040206 Firefox/0.8";
curl_setopt($ch,CURLOPT_USERAGENT,$useragent);
$this->content = curl_exec ($ch);
curl_close ($ch);
}
//this grabs only a piece of text
function grab_unit($start,$end)
{
//cut from start to end
$this->text=substr($this->content,strpos($this->content,$start)+strlen($start)+1);
$this->text=substr($this->text,0,strpos($this->text,$end));
$this->searchtxt=$this->text;
}
//it gets start delimeter, end delimeter and an array of pieces to
//put out. Returns the array of needed infomation
//delimstart and delimend are arround the pieces of searched data
function grab_array($start,$delimstart,$delimend,$end)
{
//cut from start to end
$this->text=substr($this->content,strpos($this->content,$start)+strlen($start)+1);
$this->text=substr($this->text,0,strpos($this->text,$end));
//getting out the unndeeded and pushing into the array
$this->searchar=preg_split("@$delimstart|$delimend@",$this->text);
}
//the elemnts of the array arent still what we need?
function refine_array($start,$end,$clear_html=0)
{
for($i=0;$i<sizeof($this->searchar);$i++)
{
$this->searchar[$i]=substr($this->searchar[$i],
strpos($this->searchar[$i],$start)+strlen($start));
if(!empty($end))
{
$this->searchar[$i]=substr($this->searchar[$i],
0,strpos($this->searchar[$i],$end));
}
if($clear_html)
{
$this->searchar[$i]=strip_tags($this->searchar[$i]);
}
}
}
//You still have some unregular data which makes everything bad?
// REmove the trash, giving an array of it
function remove_trash($trash)
{
for($i=0;$i<sizeof($trash);$i++)
{
for($j=0;$j<sizeof($this->searchar);$j++)
{
$this->searchar[$j]=str_replace($trash[$i],"",$this->searchar[$j]);
}
$this->searchtxt=str_replace($trash[$i],"",$this->searchtxt);
}
}
//this function does not work with the members of the grabber.
//it just takes start and end limits and the content - $word
// to return whats inside. You can easy debug it with giving 1 to testvar
function cut($start,$end,$word,$testvar=0)
{
$word=substr($word,strpos($word,$start)+strlen($start));
if($testvar) die($word);
$word=substr($word,0,strpos($word,$end));
return $word;
}
function send_post($vars, $url)
{
$strRequestBody = "";
while (list($key, $val) = each($ascVarStream))
{
if($strRequestBody != "")
$strRequestBody.= "&";
$strRequestBody.= $key."=".$val;
}
$ch = curl_init();
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_URL, $strURL);
curl_setopt ($ch, CURLOPT_POST, $strRequestBody);
curl_setopt ($ch, CURLOPT_POSTFIELDS, $strRequestBody);
$return_string = curl_exec ($ch);
curl_close ($ch);
if ($return_string=="") {
$message="Error: Could not post to remote system.";
return $message;
}
return $return_string;
} // End function
}
?>