Yeah I have an xml parser I used for another job, the problem is that the file is a txt file (although I could change it to xml)
But after the parser gets through this
<page>
</page>
the first time around, it regards the rest of the document as rubbish
The xml parse code I am using is this but it gives a nasty error message
XML error: junk after document element at line 7
XML parse code:
$file = "link to file";
$startTags = array(
"page_number" => "",
"name" => "",
"link" => "",
"content" => ""
);
// close tags opened above
$endTags = array(
"page_number" => "",
"name" => "",
"link" => "",
"content" => ""
);
function startElement($parser, $name, $attrs) {
global $startTags;
// if tag exists as key, print value
if ($startTags[$name]) { echo $startTags[$name]; }
}
function endElement($parser, $name) {
global $endTags;
if ($endTags[$name]) { echo $endTags[$name]; }
}
// process data between tags
function characterData($parser, $data) {
echo $data;
}
// initialize parser
$xml_parser = xml_parser_create();
// set callback functions
xml_set_element_handler($xml_parser, "startElement", "endElement");
xml_set_character_data_handler($xml_parser, "characterData");
// open XML file
if (!($fp = fopen($file, "r")))
{
die("Cannot locate XML data file: $file");
}
// read and parse data
while ($data = fread($fp, 4096))
{
// error handler
if (!xml_parse($xml_parser, $data, feof($fp)))
{
die(sprintf("XML error: %s at line %d",
xml_error_string(xml_get_error_code($xml_parser)),
xml_get_current_line_number($xml_parser)));
}
}
// clean up
xml_parser_free($xml_parser);