I'm strugling to parse this exotic, nested file that isn't XML.
here's a sample
F(
T(22)
p(
X(401) Y(3) W(15) F(0)
L(2)
s(
T(n() C(1) t(1))
Folders()
c(
I built a character-by-character scanner that works well enough:
$limit=strlen($file);
echo "Begin token ingestion. ($limit)\n";
for ($i=0;$i<=$limit;$i++){
$chr=substr($file,$i,1);
if ($chr=='('){
$lvl++;
$numTok++;
$tokens[$numTok]= new tok($numTok);
$tokens[$numTok]->id=$numTok;
$tokensId[$lvl]=$numTok;
$tokensStart[$lvl]=$i+1;
$prev=substr($file,0,$i);
$st1=strrpos($prev,"\n");
$st2=strrpos($prev,"(");
$st3=strrpos($prev,")");
if ($st2>$st1){
$start=$st2+1;
} else {
$start=$st1+1;
}
if ($st3>$start){
$start=$st3+1;
}
$len=$i-$start;
$tmpText=substr($file,$start,$len);
$tmpText=str_replace(" ","",$tmpText);
$tmpText=str_replace("\t","",$tmpText);
$tmpText=str_replace("\n","",$tmpText);
$tmpLabel[$lvl]=$tmpText;
if ($tmpLabel[$lvl]=='Group'){
$numGrps++;
$grp=$numGrps;
array_push($grpList,$grp);
}
}
if ($chr==')'){
$stop=$i-$tokensStart[$lvl];
$num=$tokensId[$lvl];
$tokens[$num]->label=$tmpLabel[$lvl];
$tmpStr=substr($file,$tokensStart[$lvl],$stop);
$tokens[$num]->val=$tmpStr;
$tokens[$num]->lvl=$lvl;
$tokens[$num]->grp=$grp;
$prev=$lvl-1;
$tokens[$num]->parent=$tokensId[$prev];
if ($tmpLabel[$lvl]=='Group'){
array_pop($grpList);
$tokens[$num]->grpSub=$grp;
$lastGrp=$grpList[(count($grpList)-1)];
$grp=$lastGrp;
$tokens[$num]->grp=$lastGrp;
echo "lastgrp= $lastGrp\n";
}
$lvl--;
}
}
... but for longer files it really chokes up. Folks tell me I should be able to use classes and regular expressions to type the file into a linked list, but I can't find anything on the web about how to do that.
update: I've tried using preg_match_all to grab items delimited by '(' and ')' but it doesn't do the trick since I end up with a non-nested mess.