i use this script to clear evrything script and format from a document:
<?php
$a = array ("'<script[^<]*?>.*?</script>'si", // javascript
"'<[\/\!]*?[^<>]*?>'si", // HTML elements clear
"'([\r\n])[\s]+'", // spaces
"'&(quot|#34);'i", // html entities
"'&(amp|#38);'i",
"'&(lt|#60);'i",
"'&(gt|#62);'i",
"'&(nbsp|#160);'i",
"'&(iexcl|#161);'i",
"'&(cent|#162);'i",
"'&(pound|#163);'i",
"'&(copy|#169);'i",
"'&#(\d+);'e");
$b = array ("",
"",
"\\1",
"\"",
"&",
"<",
">",
" ",
chr(161),
chr(162),
chr(163),
chr(169),
"chr(\\1)");
$text = preg_replace ($a, $b, $doc);
?>