Hey everyone,
I've been working on a project which retrieves a page with PHP and cURL.
Currently it accepts GET variables and supposedly post variables.
When I visit google and try to post some non existant post variable it gives a 501 - Not Implemented Error.
Here is what i have so far:
lexbot.php:
<?php
include("class_lexbot.php");
$lexBot = new LexBot();
$lexBot->setBinary(false);
$lexBot->setTimeout(30);
$lexBot->setPost(true);
$lexBot->setUnicode(true);
$lexBot->newGetVar("q", "some+search+string");
$lexBot->newPostVar("n", "a+random_post_var");
$lexBot->newPostVar("d", "another_random_var");
$lexBot->fetchPage("http://www.google.ca/search");
echo $lexBot->getHtml();
?>
class_lexbot.php:
<?php
class LexBot {
private $cUrl; // Our cUrl instance
private $timeout;// Timeout value
private $post; // Posting Form data?
private $html; // HTML Body
private $eHtml; // Unicode encoded Body (utf-8)
private $sendUni;// Send Unicode ?
private $binary; // For binary transfers
private $url; // Requested URL
private $gVar; // GET variables
private $pVar; // POST variables (array)
private $fPVar; // Formatted POST variables
private $uAgent; // Web Crawler Agent!
// Function to get private variables
// outside of the class
// ie. $lexBot->get("some_var");
function get($var) {
return $this->{$var};
}
// Function to set private variables
// outside of the class
// ie. $lexBot->set("some_var", "value");
function set($var, $value) {
$this->{$var} = $value;
return true;
}
// Same as above except adds to variable
function appendSet($var, $value) {
$this->{$var} .= $value;
return true;
}
// Same as above except adds to array
function appendArray($var, $name, $value) {
$this->{$var}[$name] = $value;
return true;
}
function LexBot() {
$this->set("timeout", 30);
$this->set("post", false);
$this->set("html", "");
$this->set("eHtml", "");
$this->set("sendUni", true);
$this->set("binary", 0);
$this->set("url", "");
$this->set("gVar", "");
$this->set("uAgent", "Mozilla/4.0 LexBot/1.0");
}
function getHtml() {
if ($this->get("sendUni")) {
return $this->get("eHtml");
} else {
return $this->get("html");
}
}
function newGetVar($name, $value) {
$this->appendSet("gVar", $name . "=" . $value);
}
function newPostVar($name, $value) {
$this->appendArray("pVar", $name, $value);
}
function setBinary($value) {
$this->set("binary", ($value == true ? 1 : 0));
return true;
}
function setTimeout($value) {
$this->set("timeout", (($value !== '' || $value !== NULL) ? $value : $this->get("timeout")));
return true;
}
function setPost($value) {
$this->set("post", (($value !== '' || $value !== NULL) ? $value : $this->get("post")));
return true;
}
function setUnicode($value) {
$this->set("sendUni", (($value !== '' || $value !== NULL) ? $value : $this->get("sendUni")));
return true;
}
function parseNRetTags($bTag, $eTag) {
preg_match_all("(" . $bTag . ".*" . $eTag . ")siU", $this->get("html"), $matchedData);
return $matchedData[0];
}
function fetchPage($url) {
$this->set("url", $url);
if ($this->get("url") !== NULL || $this->get("url") !== '') {
// Attach Query String?
if ($this->get("gVar") !== NULL || $this->get("gVar") !== '') {
$this->appendSet("url", "?" . $this->get(gVar));
}
// Start our cUrl instance
$this->set("cUrl", curl_init());
// Settings
// Return data and not output yet
curl_setopt($this->get("cUrl"), CURLOPT_RETURNTRANSFER, TRUE);
// URL to fetch
curl_setopt($this->get("cUrl"), CURLOPT_URL, $this->get("url"));
// User Agent
curl_setopt($this->get("cUrl"), CURLOPT_USERAGENT, $this->get("uAgent"));
// A realtively 'safe' refferer
curl_setopt($this->get("cUrl"), CURLOPT_REFERER, "http://www.google.com/?q=cool+websites");
// Headers
curl_setopt($this->get("cUrl"), CURLOPT_HEADER, 0);
// Authentication Method ( we go with any )
curl_setopt($this->get("cUrl"), CURLOPT_HTTPAUTH, CURLAUTH_ANY);
// For SSL Connections
curl_setopt($this->get("cUrl"), CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($this->get("cUrl"), CURLOPT_SSL_VERIFYHOST, FALSE);
// Follow any redirects
curl_setopt($this->get("cUrl"), CURLOPT_FOLLOWLOCATION, TRUE);
// Is binary data?
curl_setopt($this->get("cUrl"), CURLOPT_BINARYTRANSFER, $this->get("binary"));
// POST Form data ?
$tPost = (!is_array($this->get("pVar")) || $this->get("post") == FALSE) ? false : true;
if ($tPost) {
$this->set("fPVar", http_build_query($this->get("pVar")));
curl_setopt($this->get("cUrl"), CURLOPT_POST, 1);
curl_setopt($this->get("cUrl"), CURLOPT_POSTFIELDS, $this->get("fPVar"));
} else {
curl_setopt($this->get("cUrl"), CURLOPT_POST, 0);
}
// Now time to get content!
$this->set("html", curl_exec($this->get("cUrl")));
// Unicode encoded version!
$this->set("eHtml", utf8_encode($this->get("html")));
// Close connection
curl_close($this->get("cUrl"));
}
}
}
?>
Although it'll work if i set "Post to off":
<?php
include("class_lexbot.php");
$lexBot = new LexBot();
$lexBot->setBinary(false);
$lexBot->setTimeout(30);
$lexBot->setPost(false);
$lexBot->setUnicode(true);
$lexBot->newGetVar("q", "some+search+string");
$lexBot->newPostVar("n", "a+random_post_var");
$lexBot->newPostVar("d", "another_random_var");
$lexBot->fetchPage("http://www.google.ca/search");
echo $lexBot->getHtml();
?>
I would appreciate any help.
Thanks.