I having problems parsing my xml document created in Interwoven Teamsite. It will not create the correct html format and leaves || breaks and does not read <UL> or <LI>
any solutions code listed below
<?php
include("shared/header_parser.php3");
//read directory
$default_dir = "./xml_articles";
if(!($dp = opendir($default_dir))) die("Cannot open $default_dir.");
global $item_counter;
global $item_data;
global $xml_current_tag_state;
while($file = readdir($dp)) $filenames[] = $file;
closedir($dp);
sort($filenames);
for($i=0; $i < count($filenames); $i++)
{
if($filenames[$i] != '.' && $filenames[$i] != '..')
{
echo $filenames[$i] . "<br>";
runParser($filenames[$i]);
}
}
// start runParser
function runParser($currentfile) {
global $item_counter;
global $item_data;
global $xml_current_tag_state;
$item_counter = 0;
$item_data = array();
$xml_current_tag_state = '';
if( ! ($fp = fopen($currentfile , "r" )) )
die("Couldn't open xml file!");
$type = 'UTF-8';
// create our parser
$xml_parser = xml_parser_create($type);
if( !($xml_parser = xml_parser_create()) )
die("Couldn't create XML parser!");
xml_set_element_handler($xml_parser, "startElementHandler", "endElementHandler");
xml_set_character_data_handler($xml_parser, "characterDataHandler");
while( $data = fread($fp, 4096) )
{
$data = str_replace('&', '||', $data);
//$data = str_replace('\n\r', '&#A', $data);
//echo $data;
if( !xml_parse($xml_parser, $data, feof($fp)) )
{
break; // get out of while loop if we're done with the file
}
}
xml_parser_free($xml_parser);
//echo $item_counter;
//inserting data
for( $i=0 ; $i < $item_counter ; ++$i )
{
if ($item_data[$i]["item"] == "Abstract") {
$article_abstract = $item_data[$i]["value"];
$article_abstract = str_replace('||', '&', $article_abstract);
//$article_abstract = str_replace('*', '\n', $article_abstract);
}
elseif ($item_data[$i]["item"] == "Title") {
$article_title = $item_data[$i]["value"];
$article_title = str_replace('||', '&', $article_title);
//$article_title = str_replace('##', '\n', $article_title);
}
elseif ($item_data[$i]["item"] == "Subtitle") {
$article_subtitle = $item_data[$i]["value"];
$article_subtitle = str_replace('||', '&', $article_subtitle);
}
elseif ($item_data[$i]["item"] == "Byline") {
$article_byline = $item_data[$i]["value"];
$article_byline = str_replace('||', '&', $article_byline);
}
elseif ($item_data[$i]["item"] == "First Paragraph") {
$article_intro = $item_data[$i]["value"];
$article_intro = str_replace('||', '&', $article_intro);
}
elseif ($item_data[$i]["item"] == "Body") {
$article_body = $item_data[$i]["value"];
$article_body = str_replace('||', '&', $article_body);
}
elseif ($item_data[$i]["item"] == "Footnote") {
$article_footnote = $item_data[$i]["value"];
$article_footnote = str_replace('||', '&', $article_footnote);
}
//echo "Name: " . $item_data[$i]["item"] . "<BR>\n";
//echo "Value: " . $item_data[$i]["value"] . "<BR>\n";
//echo "<BR>\n";
}
$sql = "INSERT INTO article (article_abstract,article_title,article_subtitle,article_byline,article_intro,article_body,article_footnote,article_source,article_callout,article_description,article_image,article_image_alt,article_link_text,article_link_url,article_display_flag,channel_id,article_bisac,article_start_date,article_expiration_date,article_search_word) VALUES ('$article_abstract','$article_title','$article_subtitle','$article_byline','$article_intro','$article_body','$article_footnote','$article_source','$article_callout','$article_description','$article_image','$article_image_alt','$article_link_text','$article_link_url','$article_display_flag','$channel_id','$article_bisac','$article_start_date','$article_expiration_date','$article_search_word')";
echo $sql;
$result = mysql_query($sql);
echo "<h1>Record inserted</h1>";
}
// end runParser
function startElementHandler( $parser, $element_name, $element_attribs )
{
global $item_counter;
global $item_data;
global $xml_current_tag_state;
if( $element_name == "ITEM" )
{
$item_data[$item_counter]["item"] = $element_attribs["NAME"];
}
else{
$xml_current_tag_state = $element_name;
}
}
function endElementHandler( $parser, $element_name )
{
global $item_counter;
global $item_data;
global $xml_current_tag_state;
$xml_current_tag_state = '';
if( $element_name == "ITEM" )
{
$element_name = $element_name;
$item_counter++;
}
}
function characterDataHandler( $parser, $data )
{
global $item_counter;
global $item_data;
global $xml_current_tag_state;
if( $xml_current_tag_state == '' )
return;
if( $xml_current_tag_state == "VALUE" )
{
$item_data[$item_counter]["value"] = $data;
//echo $data;
}
}
?>
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<HEAD>
<TITLE>Parsing the Article and Inserting into Table</TITLE>
</HEAD>
<BODY>
</BODY>