As a bonus, here's my other function that replaces stretches of text without matches with an elipsis, like in Google search results.
function CutDownLength($text, $max=200, $beginstr='<b>', $endstr='</b>', $wordspacing=5) {
$debug = false;
$debug2 = false;
// $beginstr .. beginning of query words (i.e., already emboldened)
// $endstr .. end of query words (i.e., already emboldened)
// $wordspacing .. how many words before and after the query word to leave
$b_strlen = strlen($beginstr);
$e_strlen = strlen($endstr);
$bold_strlen = $b_strlen + $e_strlen;
// REDUCES STRING TO $max CHARACTERS OR LESS WITHOUT BREAKING UP WORDS
$text = trim($text);
// reduce string if it's greater than max
if (strlen($text) > $max) {
$times = substr_count($text, $beginstr);
// if there is only one query word
switch ($times) {
case 0:
break;
case 1:
$i=0;
// loop while $text is longer than max allowed
$thismax = $max+$bold_strlen;
while ((strlen($text) > $thismax)){
if ($debug) echo $text . "<br>";
// get the query word along with $beginstr and $endstr
$begin_pos = strpos($text,$beginstr);
$end_pos = strpos($text,$endstr);
$query_word = substr($text, $begin_pos, ($end_pos - $begin_pos + strlen($endstr)));
if ($debug) echo "text strlen: " . strlen($text) . '<br>';
if ($debug) echo "begin_pos: " . $begin_pos . '<br>';
if ($debug) echo "end_pos: " . $end_pos . '<br>';
if ($debug) echo "query word: " . $query_word . '<br>';
$outer_len = strlen($text) - strlen($query_word);
$before = explode($beginstr,$text);
$after = explode($endstr,$text);
$before_len = strlen(trim($before[0]));
$before_ratio = $before_len / $outer_len;
$after_len = strlen(trim($after[1]));
$after_ratio = $after_len / $outer_len;
$total_len_to_chop = strlen($text) - $thismax;
$len_to_chop_before = round($before_ratio * $total_len_to_chop) + 1;
$len_to_chop_after = round($after_ratio * $total_len_to_chop) + 1;
if ($debug) echo "total_len_to_chop: " . $total_len_to_chop . '<br>';
if ($debug) echo "before_ratio: " . $before_ratio . '<br>';
if ($debug) echo "after_ratio: " . $after_ratio . '<br>';
if ($debug) echo "len_to_chop_before: " . $len_to_chop_before . '<br>';
if ($debug) echo "len_to_chop_after: " . $len_to_chop_after . '<br>';
// this is so weird
// need to trim the before or after part for the count of the resulting array to decrement
// if query word is not at beginning, remove one word from the beginning
if ($begin_pos <> 0) {
$before_query_word_array = explode(" ", trim($before[0]));
if (is_array($before_query_word_array)) {
$num_before_words = count($before_query_word_array);
if ($num_before_words > $wordspacing) {
if ($i==0) {
// based on how long the string is, guess how many words to chop
$ave_before_word_len = $before_len / $num_before_words;
$numWordChop = number_format($len_to_chop_before / $ave_before_word_len, 0, '.', '');
if (($num_before_words - $numWordChop) < $wordspacing) {
$numWordChop = $num_before_words - $wordspacing;
}
if ($debug) echo "Before word count: " . count($before_query_word_array) . '<br>';
$before_query_word_array = array_slice($before_query_word_array, $numWordChop);
} else {
unset($before_query_word_array[0]);
}
$before_word_removed = true;
}
$before_query_word = implode(" ", $before_query_word_array);
unset($before_query_word_array);
} else {
$before_query_word = $before[0];
}
}
// if query word is not at end, remove one word from the end
if ($end_pos <> (strlen($text)-(strlen($endstr))-1)) {
$after_query_word_array = explode(" ", trim($after[1]));
if (is_array($after_query_word_array)) {
$num_after_words = count($after_query_word_array);
if ($num_after_words > $wordspacing) {
if ($i==0) {
// based on how long the string is, guess how many words to chop
$ave_after_word_len = $after_len / $num_after_words;
$numWordChop = number_format($len_to_chop_after / $ave_after_word_len, 0, '.', '');
if (($num_after_words - $numWordChop) < $wordspacing) {
$numWordChop = $num_after_words - $wordspacing;
}
if ($debug) echo "after word count: " . count($after_query_word_array) . '<br>';
$after_query_word_array = array_reverse($after_query_word_array);
$after_query_word_array = array_slice($after_query_word_array, $numWordChop);
} else {
unset($after_query_word_array[0]);
}
$after_word_removed = true;
$after_query_word_array = array_reverse($after_query_word_array);
}
$after_query_word = implode(" ", $after_query_word_array);
unset($after_query_word_array);
} else {
$after_query_word = $after[1];
}
}
$text = '';
if ($before_query_word) {
$text .= $before_query_word;
// if the last character is alphanumeric then put a space
if (preg_match("/([A-Za-z0-9.,;)])$/", $before_query_word)) {
$text .= " ";
}
}
$text .= $query_word;
if ($after_query_word) {
// if the first character is alphanumeric then put a space
if (preg_match("/^([A-Z-a-z0-9&(])/", $after_query_word)) {
$text .= " ";
}
$text .= $after_query_word;
}
unset($before_query_word, $after_query_word, $after, $before);
if ($i >20) break;
$i++;
if ($debug) echo "<br>";
} // loop while $text is longer than max allowed
if ($before_word_removed) {
$text = "... " . $text;
}
if ($after_word_removed) {
$text .= " ...";
}
break;
default:
// this is for when there is more than one occurence query word(s) in the text
if ($debug2) echo $text . '<br>';
$thismax = $max + ($bold_strlen * $times);
if ($debug2) echo "thismax: " . $thismax . '<br>';
// identify and remove both ends
$s = " " . $text;
// explode based on beginstr
$a = explode($beginstr,$s);
$strbeforefirst = $a[0];
if ($debug2) echo "strbeforefirst: " . $strbeforefirst . '<br>';
// remove str before first occurence
$a = array_slice($a,1);
// put back into string
$s = implode($beginstr,$a) . " ";
// explode based on endstr
$a = explode($endstr,$s);
$a = array_reverse($a);
// remove str after last occurence
$strafterlast = $a[0];
if ($debug2) echo "strafterlast: " . $strafterlast . '<br>';
$a = array_slice($a,1);
$a = array_reverse($a);
// put back into string
$s = $beginstr.implode($endstr,$a).$endstr;
$length = strlen(strip_tags($s));
if ($length > $thismax) {
if ($debug2) echo "<i>length too long need to cut down</i><br>";
// then will need to split up the string
// look for the longest break between occurences
// $s = " $s";
// explode based on beginstr
$a = explode($beginstr,$s);
while(list($key1,$val1)=each($a)) {
$sectionLen = strlen($val1)-$e_strlen;
if ($debug2) echo "sectionLen: " . $sectionLen . '<br>';
if ($sectionLen > ($thismax/3)) {
$b = explode(" ", $val1);
if (is_array($b)) {
$numBwords = count($b); // num words in this section
if ($numBwords > $wordspacing) {
$d = array_slice($b,($numBwords-$wordspacing));
$e = array_reverse($b);
$c = array_slice($e,($numBwords-$wordspacing));
$c = array_reverse($c);
$a[$key1] = implode(" ", $c) . " ... " .implode(" ", $d);
}
}
}
}
$s = implode($beginstr,$a);
}
// output the whole thing with a little before and after
$a = explode(" ",$strbeforefirst);
if (is_array($a)) {
$numWords = count($a);
$a = array_slice($a,($numWords-3));
$beforestrclip = "... " . trim(implode(" ", $a));
// if the last character is alphanumeric then put a space
if (preg_match("/([A-Za-z0-9.,;)&])$/", $beforestrclip)) {
$beforestrclip .= " ";
}
if (strlen($strbeforefirst) < strlen($beforestrclip)) {
$beforestrclip = $strbeforefirstv;
}
}
$a = explode(" ",$strafterlast);
if (is_array($a)) {
$numWords = count($a);
$a = array_reverse($a);
$a = array_slice($a,($numWords-$wordspacing));
$a = array_reverse($a);
$afterstrclip = trim(implode(" ", $a)) . " ...";
// if the first character is alphanumeric then put a space
if (preg_match("/^([A-Z-a-z0-9&(])/", $afterstrclip)) {
$afterstrclip = " " . $afterstrclip;
}
if (strlen($strafterlast) < strlen($afterstrclip)) {
$afterstrclip = $strafterlast;
}
}
$text = $beforestrclip . $s . $afterstrclip;
if ($debug2) echo "$text<br>";
if ($debug2) echo "resulting legnth: " . strlen($text) . "<br>";
if ($debug2) echo "<hr>";
break;
}
}
return $text;
}
The only problem with this function is that if there are tons of matches to the query words then, it doesn't cut it down to specified size. If anyone expands on the function to accomodate that, please share it with me. Thanks.