Thanks for the code Installer it has really helped me out. But I need it to search within the array:
$urls = array('http://www.google.com', 'http://google.com', 'www.google.de',
'google.com', 'http://example.org/', 'example/index.php',
'http://example.arpa/index.php?test=1',
'www.example.xyz', 'index.php', 'http://example/', '');
changed to:
$urls = ($linkarray[1][$i]);
so when I repaced all the test URLs with it, it gave an error, on line 154 -
foreach ($urls as $url) {
And because all your code needs to be within my FOR() loop, it gives a warning about already being called. Here's how i tried it:
<!-- Created by Lee Butcher AKA BRUm 27/7/06 -->
<html>
<head>
</head>
<body>
<FORM METHOD="POST" ACTION="<? $_SERVER['PHP_SELF']; ?>">
<INPUT TYPE="text" NAME="key1" VALUE="keyword1"><br>
<INPUT TYPE="text" NAME="key2" VALUE="keyword2"><br>
<INPUT TYPE="text" NAME="key3" VALUE="keyword3"><br>
<INPUT TYPE="text" NAME="key4" VALUE="keyword4"><br>
<INPUT TYPE="text" NAME="key5" VALUE="keyword5"><br>
<INPUT TYPE="submit" NAME="submit" VALUE="Search">
</FORM>
</body>
</html>
<?php
//Request variables
$key1 = $_REQUEST['key1'];
$key2 = $_REQUEST['key2'];
$key3 = $_REQUEST['key3'];
$key4 = $_REQUEST['key4'];
$key5 = $_REQUEST['key5'];
//Check whether form submitted. If so store google content
IF ($key1 == "keyword1" AND ($key2 == "keyword2" AND ($key3 == "keyword3" AND ($key4 == "keyword4" AND ($key5 == "keyword5")))))
{
ECHO "Form has not been submitted yet";
}ELSE{
$url = "http://www.google.co.uk/search?hl=en&q=$key1+$key2+$key3+$key4+$key5&btnG=Google+Search&meta=";
$content = file_get_contents("http://www.google.co.uk/search?hl=en&q=$key1+$key2+$key3+$key4+$key5&btnG=Google+Search&meta=");
//echo $content;
}
/*
//Retrieve links from first page
//$find = "\^\<a href='http:\/\/'%\[\^google\]%";
//$find = '<([A-Z][A-Z0-9]*)\b[^>]*>(.*?)</\1>';
IF (eregi("<([A-Z][A-Z0-9]*)\b[^>]*>(.*?)</\1>", $content, $results)) {
//IF ($pos === FALSE){
//IF (!eregi ('.*<.*href=".*">.*</a>', stripslashes(trim($content)))) {
echo "No links found containing $find";
exit;
}else{
echo $results;
//echo eregi ('.*<.*href=".*">.*</a>', stripslashes(trim($content)));
}
*/
function parse_links($url) {
# Zero or more whitespace characters
$S0 = '\s*';
# One or more whitespace characters
$S1 = '\s+';
# Anchor tag start
$anch1 = '<a' . $S1 . '.*';
#Space between
# href= pattern
$href1 = 'href' . $S0 . '=' . $S0;
# quoted strings, with selection
$q1 = "'[^']'";
$q2 = '"[^"]*"';
$q = "($q1|$q2)";
# full link pattern
$link_RE = "$anch1$S0$href1$q$S0>\s*(.*?)</a>";
//global $q, $href1, $link_RE;
preg_match_all("#$link_RE#i", $url, $matches);
return $matches; // returns an array
} // end function parse_links()
//
// DEMO OF HOW TO USE THE FUNCTION
// grab a webpage
$str = implode('',file($url));
// call the parse_links function
$linkarray=parse_links($str);
echo "Relevent links found to backlink:<br><br>";
$host = $linkarray[1][$i];
//parse_url($host);
$host = explode(".", $host);
//echo $host[0];
//echo $host[1];
//echo $host[2];
//echo $host[3];
echo "<br><br>";
// loop through the link array, outputting the URL + Link Text
for ($i = 0; $i < sizeof($linkarray[0]); $i++){
echo ($linkarray[2][$i] . " - <a href=" . $linkarray[1][$i] . "> Link " . $i . "</a><br>");
//$host = $linkarray[1][$i];
//parse_url($host);
//$host = explode(".", $host);
//echo $host[0];
//echo "$host[1]<br>";
//echo "$host[2]<br>";
//echo $host[3];
//Search array for domain and extension
//$try = "http://www.google.com";
//$domain = '^\^[[:alnum:]][a-z0-9_\.\-]+\.[a-z]{2,4}$^';
//$domain = '%^[[:alnum:]][a-z0-9_\.\-]+\.[a-z]{2,4}$%';
//$results = preg_match($domain, $try, $found);
//echo $found[0]."<br>";
function domain_substr($str, $loc = 'http://data.iana.org/TLD/tlds-alpha-by-domain.txt')
{
if (($tmp = strstr($str, '//')) !== false) {
$str = ltrim($tmp, '/');
}
$str = strtok($str, '/');
$arr = explode('.', $str);
$max_key = count($arr) - 1;
if ($max_key < 1) {
return false;
}
$list = file($loc);
foreach ($list as $key => $line) {
$list[$key] = trim($line);
}
if (!in_array(strtoupper($arr[$max_key]), $list)) {
return false;
}
$dom = $arr[$max_key - 1] . '.' . $arr[$max_key];
return $dom;
}
$urls = ($linkarray[1][$i]);
foreach ($urls as $url) {
echo $url . ' => ';
if ($domain = domain_substr($url)) {
echo $domain . '<br />';
} else {
echo 'false' . '<br />';
}
}
//preg_match('/^[[:alnum:][^.]+\.[^.]]+$/', $linkarray[1][$i], $found);
//echo $found[0]."<br>";
/*
//Expressions to split URLs
$start = 'http://';
$www = 'www.';
$domain = '[[:alnum:]]';
$extension = '\.[[:alnum:]]+';
//Full pattern
$pattern = "$start$www?$domain$extension";
//Search
preg_match_all("#$pattern#i", $linkarray[1][$i], $matches1);
return $matches2;
*/
}
echo "<br><br>";
//process again but with emails
for($o = 0; $o < sizeof($linkarray[0]); $o++){
$emailf = $linkarray[1][$o];
function parse_emails($emailf) {
//splt URLs
FOR ($x = 0; $x < sizeof($linkarray[0]); $x++)
$purl = $linkarray[1][$x];
print_r(parse_url($linkarray[1][$x]));
# Zero or more whitespace characters
$S0 = '\s*';
# One or more whitespace characters
$S1 = '\s+';
# Anchor tag start
$anch1 = '<a' . $S1 . '.*';
#Space between
# href= pattern
$href1 = 'href' . $S0 . '=' . $S0;
# quoted strings, with selection
$q1 = "'[^']'";
$q2 = '"[^"]*"';
$q = "($q1|$q2)";
# full link pattern
$link_RE = "$anch1$S0$href1$q$S0>\s*(.*?)</a>";
//global $q, $href1, $link_RE;
preg_match_all("#$link_RE#i", $linkarray[1][$o], $matches);
return $matches; // returns an array
} // end function parse_links()
//
// DEMO OF HOW TO USE THE FUNCTION
// grab a webpage
if (($tmp_content = @file($emailf))) {
$str = implode('',$tmp_content);
} else die("File {$emailf} does not exists!");
// call the parse_links function
$linkarray=parse_emails($str);
echo "Relevent links found to backlink:<br><br>";
// loop through the link array, outputting the URL + Link Text
for ($i = 0; $i < sizeof($linkarray[0]); $i++){
echo ($linkarray[2][$o] . " - <a href=" . $linkarray[1][$o] . "> Link " . $o . "</a><br>");
}
echo "<br><br>";
}
//splt URLs
FOR ($x = 0; $x < sizeof($linkarray[0]); $x++)
print_r(parse_url($linkarray[1][$x]));
?>
Thanks again.