First off, ereg_* have been deprecated and will be dropped as of PHP 6, so you should stick exclusively to PCRE.
First replacing \r\n with \n, then \n\n with \n and finally \n with '' is the same as replacing \r\n with '', then \n with ''.
The easiest approach would proably be to use DOM.
$dom = new DOMDocument();
$dom->loadHTML($str);
foreach($dom->childNodes as $c)
descendAndCleanText($c);
function descendAndCleanText($n) {
if ($n->nodeName == '#text') {
$from = array("\r\n", "\n", ' ', "\t");
$to = array('', '', ' ', '');
$n->nodeValue = str_replace($from, $to, $n->nodeValue);
}
else if ($n->hasChildNodes()) {
$comments = array();
foreach($n->childNodes as $c) {
if ($c->nodeName == '#comment') {
// can't delete child nodes while foreaching through them in this way
$comments[] = $c;
}
// not really necessary, since script doesn't contain #textNode, but CDATA...
// still, there is no use descending into script nodes only to find this out anyway.
else if ($c->nodeName != 'script') {
descendAndCleanText($c);
}
}
foreach($comments as $c)
$c->parentNode->removeChild($c);
$n->normalize();
}
}