I have a MSWord file or Matlab file and using string instruction tried to extract the image out of taht file. The image can be jpg,gif,bmp, png or tif. I know somthing about image headrers. for example ffd8 is the first two bytes that a jpg image starts with.
this is my code:
<?php
function hex2bin($h) //convert from hex to bin
{
if (!is_string($h)) return null;
$r='';
for ($a=0; $a<strlen($h); $a+=2) { $r.=chr(hexdec($h{$a}.$h{($a+1)})); } //>
return $r;
}
function print_photo($i,$name){ //print()
global $data;
global $pos1,$pos2;
$content=substr($data,$pos1,$pos2-$pos1); //extract hex data
$content=hex2bin($content); //convert to binary data
$file_name=$i . '.' . $name;
$file = fopen($file_name,"w"); //save image
fwrite($file,$content);
fclose($file);
} //print()
$i=0;
$data='';
$file = fopen("test.doc","r"); //read data from MSWord file
while(! feof($file))
$data.=fread($file,'1024');
fclose($file);
$data=bin2hex($data);
$type='ffd8'; //find jpg
$array1=array(); //keeps the positin of 'ffd8' found in the input file
$start=0;
$k=0;
while(1){ //while1
$pos1=-1;
$pos1=strpos($data,$type,$start);
if($pos1>'-1'){
$array1[$k]=$pos1;
$k++;
$start=$pos1+4;
}
else
break;
} //while1
$y=strlen($data);
foreach($array1 as $x){ //for each position found
$pos1=$x;
$pos2=$y;
print_photo($i,'jpg'); // print the possible photo
$i++;
}
?>
the code above works for a MSWord file as input, but for the files that i have placed in the address bellow does'n work. could somebody revise my code?
thanks.
please extract files using winrar or 7zip at first.
http://aladdinmobile.com/files.tar.gz