I need to check a suppliers website for stock availability and pricing but this requires a login.
To get around this I am attempting to implement CURL to log me in and redirect me to the product page in order to then scrape the availability and price etc.
The issue I have is that the login appears not to work. I get the error:
Warning: curl_setopt_array() [function.curl-setopt-array]: Unable to access cookies_path.txt in (path to folder specified)
I am using a class obtained from another forum which has good feedback and therefore should work.
Full code is as follows:
class Secure_Crawler {
private $loginUrl = 'this is the page the login form posts to';
private $options = array( );
private $connected = false;
function __construct () {
$cookies = 'cookies_path.txt';
$this->options = array(
CURLOPT_USERAGENT => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)',
CURLOPT_HEADER => false, // Add response headers to return, for testing
CURLOPT_RETURNTRANSFER => true, // Add html to return
CURLOPT_COOKIEJAR => $cookies,
CURLOPT_COOKIEFILE => $cookies,
);
// Reset cookies
@ unlink($cookies);
}
function login ($username, $password) {
$ch = curl_init();
$options = $this->options;
$options[CURLOPT_URL] = $this->loginUrl;
$options[CURLOPT_POST] = true;
// Login form fields
$options[CURLOPT_POSTFIELDS] = $this->getPostFields(
array(
'login.login' => $username,
'login.password' => $password,
'smalllogin.path' => '/main/en/page.html'
));
$options[CURLOPT_FOLLOWLOCATION] = false;
curl_setopt_array($ch, $options);
curl_exec($ch);
//Close curl session
curl_close($ch);
$this->connected = true;
}
function get ($url) {
if (!$this->connected)
throw new Excetion("Not connected");
$ch = curl_init();
//Get
$options = $this->options;
$options[CURLOPT_URL] = $url;
curl_setopt_array($ch, $options);
$results = curl_exec($ch);
//Close curl session
curl_close($ch);
return $results;
}
private function getPostFields ($data) {
$return = array();
foreach ($data as $key => $field) {
$return[] = $key . '=' . urldecode($field);
}
return implode('&', $return);
}
}
$crawler = new Secure_Crawler();
// Login to website
$crawler->login('my username', 'my password');
// Get Content
$content = $crawler->get('this is the product page');
// modifications...
echo $content;
I had assumed there may be an issue writing the cookies_path.txt file to the directory however I think I have correct permissions and it appears that the file is being created but then deleted. In any case the script appears to be unable to read the file to perform log on.