Okay, so after looking over it, I seem to have come up with this:
<?php
# Define a few vars:
$debug = true;
if($debug == true)
$_SERVER['HTTP_REFERER'] = 'http://www.google.com/search?hl=en&q=a+test+search+from+google&btnG=Google+Search';
$char = false; // Change to Charset as needed
################################
$ref = $_SERVER['HTTP_REFERER'];
$pref = bbc_parse_ref($ref);
$keywords = bbc_get_keywords($ref, $char);
echo 'Hi there. You entered this site from '.$pref[0].' with the keywords:<br />';
if($keywords !== false)
foreach($keywords as $word) { echo $word.'<br>'; }
/*** From here down is straight from bbclone ***/
// @return STRING
function bbc_specialchars_decode($str) {
$trans = array(
"&" => "&",
" " => " ",
""" => "\"",
"'" => "'",
"<" => "<",
">" => ">"
);
return strtr($str, $trans);
}
// @return STRING | ARRAY
function bbc_get_search($array) {
// turns variable assignments to an associative array
$result = false;
$query = array(
"^as_(ep|o|e)?q=",
"^q(_(a(ll|ny)|phrase|not)|s|t|u(ery)?)?=",
"^s(u|2f|p\-q|earch(_?for)?|tring|zukaj)?=",
"^k(w|e(reses|y(word)?s?))=",
"^b(egriff|uscar?)=",
"^w(d|ords?)?=",
"^te(rms?|xt)=",
"^mi?t=",
"^heureka=",
"^p=",
"^r(eq)?=",
"/search/web/",
"^userQuery=",
"^v[aeop]="
);
foreach ($array as $string) {
$string = rawurldecode($string);
// skip empty GET variables
if (substr($string, -1) == "=") continue;
foreach ($query as $key) {
preg_match("%$key%", $string, $matches);
if (empty($matches)) continue;
$par = $matches[0];
$pos = strpos($string, $par);
$term = substr($string, ($pos + strlen($par)));
if ((defined("_BBC_MBSTRING") ? mb_strlen($term) : strlen($term)) < 2) {
$matches = array();
continue;
}
if (($par[0] == "q") || ($par[0] == "s")) return $term;
}
$result = isset($par) ? $term : $result;
}
return $result;
}
// @return ARRAY | FALSE
function bbc_get_keywords($ref, $char) {
$var_sep = array("&", "|");
$word_sep = array( "+", "-", " ", "/");
$match = array(
"ara", "busca", "pesquis", "search", "srch", "seek", "zoek", "result", "szuka", "cherch", "such", "find",
"trouve", "trova", "pursuit", "keres", "katalogus", "alltheinternet.com", "mamma.com", "baidu.com", "heureka.hu",
"kartoo.com", "ask.com", "aport.ru", "google", "yahoo"
);
foreach ($match as $key) {
$is_search = (strpos(strtolower($ref), $key) !== false) ? true : false;
if ($is_search) break;
}
if (!$is_search) return false;
$ref = bbc_specialchars_decode(rawurldecode($ref));
$is_query = strrpos($ref, "?");
$ref = ($is_query !== false) ? substr($ref, ++$is_query) : substr($ref, (strpos($ref, "://") + 3));
$get_vars = bbc_get_sep($ref, $var_sep);
$raw_search = bbc_get_search($get_vars);
if ($raw_search === false) return false;
// Conversion of keywords, if applicable
$from = defined("_BBC_MBSTRING") ? bbc_get_encoding($raw_search) : false;
$raw_search = (($from !== false) || defined("_BBC_RECODE")) ? bbc_convert_lang($raw_search, $from, $char) :
$raw_search;
$raw_search = bbc_get_sep($raw_search, $word_sep);
if ($char) mb_internal_encoding($char);
for ($i = 0, $j = count($raw_search); $i < $j; $i++) {
$tmp = !$char ? strtolower(bbc_clean($raw_search[$i])) : bbc_clean($raw_search[$i]);
// Filter search engine cache indicator
if (!preg_match("%^(cache|tbn|link)\:[\w\-]{8,}%", $tmp) &&
((defined("_BBC_MBSTRING") ? mb_strlen($tmp) : strlen($tmp)) < 51)) {
$result[] = $tmp;
}
}
return (!empty($result) ? $result : false);
}
// @return ARRAY
function bbc_parse_ref($ref) {
// do nothing in case some old "ignored" entries survived an update
if ($ref == "ignored") return -1;
$ref_array = parse_url($ref);
if (!isset($ref_array['scheme'])) return false;
// compare whether we got a "www.*" equivalent recorded (or missing)
$old_host = $ref_array['host']."/";
$prefix = substr($old_host, 0, ($tmp = strpos($old_host, ".")));
$suffix = substr($old_host, ++$tmp);
$new_host = ($prefix != "www") ? "www.".$old_host : $suffix;
$path = !isset($ref_array['path']) ? "/" : $ref_array['path'];
$path = isset($ref_array['query']) ? $path."?".$ref_array['query'] : $path;
return array($old_host, $new_host, $path);
}
// @return ARRAY
function bbc_get_sep($query, $array) {
// puts the query into an array
foreach ($array as $match) {
$has_sep = (strpos($query, $match) !== false) ? true : false;
$pool = $has_sep ? explode($match, $query) : array($query);
for ($i = 0, $max = count($pool); $i < $max; $i++) {
// Characters which usually aren't needed at the beginning or end of a keyword
$pool[$i] = preg_replace("%^\W*(\b.{2,}\b)\W*$%", "\\1", $pool[$i]);
if (empty($pool[$i])) {
unset($pool[$i]);
continue;
}
}
if ($has_sep) return array_values($pool);
}
return array_values($pool);
}
// @return STRING
function bbc_clean($input, $sep = 0) {
$sp = strpos($input, $sep);
// only look for separator if really needed
$input = (!empty($sep) && ($sp !== false)) ? substr($input, 0, $sp) : $input;
$input = str_replace("\\", "/", strip_tags($input));
$input = str_replace("$", "$", htmlspecialchars($input, ENT_QUOTES));
// Limit the maximum length to 1024 chars
return trim(substr($input, 0, 1024));
}
?>
Now, browsing to this page outputs the following:
Hi there. You entered this site from www.google.com/ with the keywords:
a
test
search
from
google