Alright, this is my latest version, also a failure. I tried a while loop:
$site_domain = 'https://wheeltastic.com';
$tocrawl_array[0] = '/';
$crawled_array = array();
function is_image($path){
$a = getimagesize($path);
$image_type = $a[2];
if(in_array($image_type , array(IMAGETYPE_GIF , IMAGETYPE_JPEG ,IMAGETYPE_PNG , IMAGETYPE_BMP))){
return true;
}
return false;
}
$options = array('http' => array('user_agent' => 'Wheelie-Bot / Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'));
$context = stream_context_create($options);
$cd = 0;
$tc = 0;
while(array_key_exists($tc, $tocrawl_array)){
echo $tc;
$crawl = $tocrawl_array[$tc];
if(!in_array($crawl, $crawled_array)){
$html = file_get_contents($site_domain.$crawl, false, $context);
$crawled_array[$cd] = $crawl;
$cd = ++$cd;
$dom = new DOMDocument();
@$dom->loadHTML($html);
// grab all the links on the page
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
for ($i = 0; $i < $hrefs->length; $i++) {
$href = $hrefs->item($i);
$url = $href->getAttribute('href');
if(!@is_image('.'.$url) AND substr( $url, 0, 1 ) === "/" AND !in_array($url, $crawled_array) AND !in_array($url, $tocrawl_array)){
echo $url.'<br />';
$tn = ++$tc;
$tocrawl_array[$tn] = $url;
}
}
}
$tc = ++$tc;
// Stop it early for testing
if($tc > 100){
exit();
}
}
print_r($tocrawl_array);
At the bottom, i printed the array to make sure that the next array key existed and it does.
/>Array
(
[0] => /
[1] => /?action=cart
[2] => /?action=about
[3] => /?action=contact
[4] => /category/accessories
[5] => /category/lights
[6] => /category/mounts
[7] => /category/wheel-accessories
[8] => /category/wheels
[9] => /category/winches
[10] => /brand/method-race-wheels
[11] => /brand/visionx
[12] => /brand/baja-designs
[13] => /brand/superwinch
[14] => /brand/mile-marker
[15] => /?action=my_stuff
[16] => /?action=my_orders
[17] => /?action=privacy
[18] => /?action=tos
)
So although my intention was to have it go to $tocrawl_array[1], it seems to refuse to do so and stopped after the first iteration of the loop.
I just don't know what else to try. I've exhausted my limited knowledge in matters such as these and will patiently wait for someone to point me in the correct direction 🙂