thanks, my slacker friend.
i just today discovered 'nested sets' or more specifically, the 'modified preorder tree traversal algorithm'.
there's some great info here:
http://www.sitepoint.com/article/1105
pretty cool stuff.
unfortunately, this is not where my performance difficulties lie. I'm using Macromedia FLASH as a client and had written a really nifty unserialize() routine in Actionscript to facilitate sharing of complex data with the server (my server doesn't have any XML libraries).
sadly, the routine i've written is totally chewing up cpu time. I did some profiling and this is the routine that's ruining everything. Any Actionscript heads out there feel free to explain to me how I might speed this up...it's kind of a finite-state machine. PHP heads might also have some insight as very little of this code is specific to actionscript:
//UNSERIALIZE FUNCTIONS
function extractNumberFromString(strArg, intStartIndex) {
// intStartIndex contains the index of the first digit
// this function gets all the remaining digits of the
// number starting here.
strArg = String(strArg);
var i = intStartIndex;
var intLastIndex = strArg.length-1;
if (intLastIndex == -1) {
trace("empty string sent to getNumberStartingHere");
return false;
}
if (intStartIndex > intLastIndex) {
trace("bad start index passed to getNumberStartingHere");
return false;
}
var c;
var result='';
var keepGoing = false;
do {
if (i > intLastIndex) {
break;
}
c = strArg.charAt(i);
if (!isNaN(c)) {
result += String(c);
keepGoing = true;
} else {
keepGoing = false;
}
i++;
} while(keepGoing==true)
return Number(result);
}
function unserialize(string_arg) {
open_token = string_arg.substr(0, 2);
if (open_token == "a:") {
intNumKeys = extractNumberFromString(string_arg, 2) * 2;
intLength = string_arg.length;
intStartIndex = string_arg.indexOf(":", 2)+2;
// error check here...make sure the brackets are in there
if ((string_arg.substring(intStartIndex-1, intStartIndex) != "{") ||
(string_arg.substring(intLength-1, intLength) != "}")) {
trace("BAD PATTERN...brackets don't check out");
return false;
}
// in order to recurse properly, we will need to
// pass the chars in an array because
// arrays can be passed by reference in actionscript
strTmp = string_arg.substring(intStartIndex, intLength-1);
strUnserializeCharArray = strTmp.split("");
// save memory
string_arg = null;
return unserialize_sub(intNumKeys, strUnserializeCharArray);
} else if (open_token == "s:") {
// this is a single string
intLength = extractNumberFromString(string_arg, 2);
intStringStart = string_arg.indexOf("\"", 4)+1;
return string_arg.substr(intStringStart, intLength);
} else if ((open_token == "i:") ||
(open_token == "d:")) {
// this is a single integer or double value
intValue = extractNumberFromString(string_arg, 2);
return intValue;
} else if ((string_arg.substr(0,1) == "N") ||
(string_arg.substring(0,1) == "n")) {
// this is a NULL value
// return something equivilent to "NULL" in actionscript world
trace("Unserializing NULL value " + string_arg);
//return equivalant of NULL
return null;
} else {
// invalid data
trace("Unserializing BAD DATA! " + string_arg);
return false;
} // if...
} // unserialize()
function unserialize_sub(keys, arrChars) {
var result = new Array();
var temp;
var keyname;
var skip;
var strlen;
/* this basically iterates through each character one-by-one
it's a state machine, it keeps the current state in "$mode"
when mode ==
'string'
1) look for a digit, which is the length of the
serialized string that we're about to see, save
that as $strlen.
2) after the next ':', $mode='readstring'
'readstring'
capture $strlen characters into $temp
then, mode='set'
'set'
1) set $value=$temp and assign the current hash key=$value
2) $mode='normal'
'integer'
1) put all digits into $temp, skip ":"
2) at the first ";", $mode='set'
'double'
1) same as integer, only allows "." in input
2) at the first ";", $mode='set'
'null'
1) null value, set $temp="\0"
2) $mode='set'
*/
var mode = 'normal'; // default mode
//it loops through each character in the arg string
while (arrChars.length) {
c = arrChars.shift();
//string processing
//format: s:length:"data"
if (mode == 'string') {
skip = 1; //how many chars should 'readstring' skip?
//consider all strings have " at beginning...
//if we are up to the numbers...
//find out how many chars need to be read
if (!isNaN(c)) {
//get the length of string
strlen = String(strlen) + String(c);
}
//if we already have some value for length
// and see ':', we know that
//the actual string is coming next (see format above)
if ((!isNaN(strlen)) && (c == ':')) {
mode = 'readstring';
}
} else if (mode == 'readstring') {
//read in strlen number of characters (aka. the whole string)
//skip the appropriate amount of chars
if (skip-- > 0) {
continue;
}
if (!strlen--) {
mode = 'set';
continue;
}
//grab characters until we reach $strlen
temp += c;
//integer processing
//format: i:data
} else if (mode == 'integer') {
if (c == ':') {
continue;
}
if (c == ';') {
temp = Number(temp);
mode = 'set';
continue;
}
//grab integers
if ((!isNaN(c)) || (c=="-")) {
if (c == '-') {
if (!temp) {
temp += String(c);
}
} else {
temp += String(c);
}
}
} else if (mode == 'double') {
//double processing
//format: d:data
if (c == ':') {
continue;
}
if (c == ';') {
temp = Number(temp);
mode = 'set';
continue;
}
//grab integers and decimal points
if ((!isNaN(c)) || (c==".") || (c=="-") || (c=="e")) {
if (c == '-') {
if (!temp) {
temp += c;
}
} else {
temp += c;
}
}
} else if (mode == 'null') {
//null value processing
//format: N
//temp should not be set to an empty string, becuase PHP serializes
//empty strings as 's:0:"";', not 'N;' -- only true NULL values
//should be serialized as 'N;', thus when we un-serialize, we should
//be faithful and give perl a NULL value.
temp = null;
mode = 'set';
continue;
} else if (mode == 'array') {
//array processing
//format: a:num_of_keys:{...}
//end of array definition, start processing it
if (c == '{') {
//recurse to get the sub-array
result[keyname] = new Array();
result[keyname] = unserialize_sub(temp * 2, arrChars);
//begin processing as normal again
keyname = null;
mode = 'normal';
} else if (!isNaN(c)) {
//find out how many keys are in this sub-array
temp = temp + String(c);
}
} else if (mode == 'set') {
//set the thing that's waiting for information ('key' or 'value')
//to the current data held in $temp
//(the keyname has already been specified)
//so, $temp is a value for the keyname
if (keyname != null) {
result[keyname] = temp;
//blank out keyname
keyname=null;
} else {
//$temp is a keyname
keyname = temp;
}
mode = 'normal';
}
if (mode == 'normal') {
//blank out temp vars
strlen = '';
temp = '';
if (!keys) {
return result;
}
//Upcoming information is integer
if (c == 'i') {
mode = 'integer';
keys--;
}
//Upcoming information is a double
if (c == 'd') {
mode = 'double';
keys--;
}
// Upcoming information is string
if (c == 's') {
mode = 'string';
keys--;
}
//Upcoming information is array/hash
if (c == 'a') {
mode = 'array';
keys--;
}
//Upcoming information is a null value
if ((c == 'N') || (c == 'n')) {
mode = 'null';
keys--;
}
} //if mode is normal
} //main loop
//technically, you should never hit this, becuase
//the value of $keys should be exactly the value of
//keys that need to be unserialized, and the loop
//should never exit from lack of characters
trace("SHOULD NOT HIT THIS--return normally (chars=" + chars + ")");
return result;
} //unserialize_sub()