Just wanted some insight as to whether or not this function I wrote will completely and absolutely remove all "unsafe" HTML from text except that which is defined within a flat file.
Thanx
Phil
if (!function_exists('safe_html')) {
/**
* Ths function will strip out all HTML from string text except those deemed "safe" according to a flat-file of "safe" HTML commands found outside of this library
*
* @access public
* @param mixed $url
* @param mixed $safeHTMLFile File containing the "safe HTML" tag phrases (HTML tags not included)
* @param mixed $safeHTMLPath (optional) Optional path where $safeHTMLFile is located if not in default location
* @return mixed $msg String that will have resulted from $text properly "HTML cleaned" using TCL engine
* @see actual_path
* @see file_get_contents
* @link http://us2.php.net/manual/en/function.strip-tags.php#36574
* @see link regarding usage of $attr string to strip potentially harmful attributes from all tags
*/
function &safe_html($text, $safeHTMLFile = '', $safeHTMLPath = '') {
global $scriptPath, $hasSafeHTMLFile, $safeHTMLFullFileName;
if (!preg_match('/<[^>]+>/i', $text)) return $text; // NO NEED TO PARSE IF NO HTML IS EVEN FOUND
if ($hasSafeHTMLFile && is_file(actual_path($safeHTMLFullFileName))) list($safeHTMLPath, $safeHTMLFile) = preg_split('/\/[^\/]+$/', $safeHTMLFullFileName);
if (($safeHTMLPath && !is_file(actual_path("$safeHTMLPath/$safeHTMLFile"))) || !is_file(actual_path("$scriptPath/tcl/$safeHTMLFile"))) return $text; // NO SAFE HTML TAG FILE
if ($safeHTMLPath && is_file(actual_path("$safeHTMLPath/$safeHTMLFile"))) {
$tags = @file_get_contents(actual_path(realpath("$safeHTMLPath/$safeHTMLFile")));
} else {
$tags = @file_get_contents(actual_path(realpath("$scriptPath/tcl/$safeHTMLFile")));
}
$tagArray = preg_split('/[\n\r]/', preg_replace('/[\n\r]*$/', '', trim($tags)));
@array_walk($tagArray, create_function('&$a', '$a = "<$a>";'));
$tags = @join(' ', $tagArray);
$attrs = 'javascript:|onclick|ondblclick|onmousedown|onmouseup|onmouseover|onmousemove|onmouseout|onkeypress|onkeydown|onkeyup';
$text = preg_replace('/<![^>]+>/', '', $text); // WILL STRIP OUT HARMFUL "COMMENT TAGS"
$text = stripslashes(preg_replace("/$attrs/i", 'not_allowed', $text)); // WILL STRIP OUT POTENTIALLY HARMFUL ATTRIBUTES FROM TAGS
return strip_tags($text, $tags); // IF $tags IS EMPTY THEN ALL HTML WILL BE STRIPPED INADVERTENTLY
}
}