Hi, I just finished this program at 4.00AM and I its working exactly like I want it to .. minus a little bug with the <BR> insertion and line ending ect .. but nothing major.
My quesiton is .. I see alot of array_search() and if-else in here .. and a foreach that loops through every character in the input string ... I dont think this is a very effiecient way of doing this.
I realize I could use a switch instead of all those If-Else, but is there any other thing I could do to make this a little more effiecient?
btw .. the text strings that will be passed to this will be medium text articles ect ect .. the average size will be from 8000-10000 chars but it might go up to 25000-30000 char for long articles.
I dont think a loop that goes 30,000 time is a good idea .. but I cant think of any other way.
searching the string for substring and replacing is out of the question btw.
Let me know. My current working code is bellow. I tried to comment where I remember to. LOL.
Thanks.
<?php
/* Input : post variable $_POST[] containing vni format vietnamese words.
Output: html formated vietnamese words.
-- String passed in
-- Loop through string using foreach $string[]
--- Is the character a space?
---- yes: add space to $output
---- no: Is the character a line break?
----- yes: add <BR> to $output
----- no: Is the character alphanumeric?
------ yes: add character to $output
------ no: Is the character represented in $onechar_html or $onechar_code?
------- yes: Search and place key in $key. Add $key to output
------- no: Check next char in line to see if its a special character also.
-------- yes: Add next char to $key. Increment count by 1.
Match againt $twochar_html and $twochar_code. Add to $output.
-------- no : Take previous char from $input and add to $key. Take the last character off $output.
Match againt $twochar_html and $twochar_code. Add to $output.
That was the basic logic of this program minus a few special cases.
*/
$special_char_list= array('', 'Ü', 'Ú', 'Ù', 'Û', 'û', 'ú', 'ù', 'ü', 'Ö', 'Ó', 'Ò', 'Ô', 'Õ', 'ó', 'ò', 'õ',
'ö', 'ô', 'Ä', 'Å', 'Á', 'À', 'Ã', 'ã', 'å', 'á', 'à', 'ä', 'Ï', 'ï', 'Ê', 'É', 'È',
'Ë', 'é', 'è', 'ê', 'ë', 'Ø', 'ø', 'Ñ', 'ñ', 'Æ', 'æ');
//regular alphabet soup
$regular_soup = array('',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z',
'1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
',', '.', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '_', ')');
//vni char that appears as one char and is replacable using html
$onechar_html = array('', 'ì' => 'ì', 'í' => 'í', 'Ì' => 'Ì', 'Í' => 'Í');
//vni char that appears as one char and is NOT replacable using pure html
$onechar_code = array('',
'Ơ' => 'Ô', 'ơ' => 'ô', 'Ư' => 'Ö', 'ư' => 'ö', 'Đ' => 'Ñ',
'đ' => 'ñ', 'Ỉ' => 'Æ', 'Ĩ' => 'Ó', 'Ị' => 'Ò', 'ỉ' => 'æ',
'ĩ' => 'ó', 'ị' => 'ò');
//vni char that appears as two char and is replacable using html
$twochar_html = array('',
'à' => 'aø', 'á' => 'aù', 'â' => 'aâ', 'ã' => 'aõ',
'À' => 'AØ', 'Á' => 'AÙ', 'Â' => 'AÂ', 'Ã' => 'AÕ',
'ò' => 'oø', 'ó' => 'où', 'ô' => 'oâ', 'õ' => 'oõ',
'Ò' => 'OØ', 'Ó' => 'OÙ', 'Ô' => 'OÂ', 'Õ' => 'OÕ',
'è' => 'eø', 'é' => 'eù', 'ê' => 'eâ',
'È' => 'EØ', 'É' => 'EÙ', 'Ê' => 'EÂ');
//vni char that appears as two char and is NOT replacable using pure html
$twochar_code = array('',
'Ả' => 'AÛ', 'Ă' => 'AÊ', 'Ạ' => 'AÏ', 'Ấ' => 'AÁ', 'Ầ' => 'AÀ',
'Ẩ' => 'AÅ', 'Ẫ' => 'AÃ', 'Ậ' => 'AÄ', 'Ắ' => 'AÉ', 'Ằ' => 'AÈ',
'Ẳ' => 'AÚ', 'Ẵ' => 'AÜ', '͞' => 'AË', 'ă' => 'aê', 'ả' => 'aû',
'ạ' => 'aï', 'ấ' => 'aá', 'ầ' => 'aà', 'ẩ' => 'aå', 'ẫ' => 'aã',
'ậ' => 'aä', 'ắ' => 'aé', 'ằ' => 'aè', 'ẳ' => 'aú', 'ẵ' => 'aü',
'ặ' => 'aë',
'Ỏ' => 'OÛ', 'Ọ' => 'OÏ', 'Ố' => 'OÁ', 'Ồ' => 'OÀ', 'Ổ' => 'OÅ',
'Ỗ' => 'OÃ', 'Ộ' => 'OÄ', 'Ớ' => 'ÔÙ', 'Ờ' => 'ÔØ', 'Ở' => 'ÔÛ',
'Ỡ' => 'ÔÕ', 'Ợ' => 'ÔÏ', 'ỏ' => 'oû', 'ọ' => 'oï', 'ố' => 'oá',
'ồ' => 'oà', 'ổ' => 'oå', 'ỗ' => 'oã', 'ộ' => 'oä', 'ớ' => 'ôù',
'ờ' => 'ôø', 'ở' => 'ôû', 'ỡ' => 'ôõ', 'ợ' => 'ôï',
'Ẻ' => 'EÛ', 'Ẽ' => 'EÕ', 'Ẹ' => 'EÏ', 'Ề' => 'EÀ', 'Ế' => 'EÁ',
'Ể' => 'EÅ', 'Ễ' => 'EÃ', 'Ệ' => 'EÄ', 'ẻ' => 'eû', 'ẽ' => 'eõ',
'ẹ' => 'eï', 'ế' => 'eá', 'ề' => 'eà', 'ể' => 'eå', 'ễ' => 'eã',
'ệ' => 'eä', 'Ủ' => 'UÛ', 'Ụ' => 'UÏ', 'Ứ' => 'ÖÙ', 'Ừ' => 'ÖØ',
'Ử' => 'ÖÛ', 'Ữ' => 'ÖÕ', 'Ự' => 'ÖÏ', 'ủ' => 'uû', 'ũ' => 'uõ',
'Ũ' => 'UÕ', 'ụ' => 'Uï', 'ứ' => 'öù', 'ừ' => 'öø', 'ử' => 'öû',
'ữ' => 'öõ', 'ự' => 'öï', 'Ỳ' => 'YØ', 'Ỷ' => 'YÛ', 'Ỹ' => 'YÕ',
'ỳ' => 'yø', 'ỷ' => 'yû', 'ỹ' => 'yõ', 'ụ' => 'uï',
'ù' => 'uø', 'Ý' => 'YÙ', 'ý' => 'yù', 'ú' => 'uù');
$input = trim($_POST[input]);
$length = strlen($input);
$output ='';
for($count = 0; $count < $length; $count++){
//is the character a space?
if($input[$count] === ' '){
$output = $output." ";
} elseif ($input[$count] === "\n"){
$output = $output."<br>";
} else {
if(array_search($input[$count], $regular_soup)){
//character is regular alpha numeric character
$output = $output.$input[$count];
} elseif ($key = array_search($input[$count], $onechar_code)){
//character is in $onechar_code
if(($input[$count] == 'ö') && ($input[$count+1] == 'ô')){
$output = $output.$key;
} elseif(($input[$count] == 'ô') && ($input[$count+1] == ' ')){
$output = $output.$key;
} elseif(($input[$count] == 'ö') && ($input[$count+1] == ' ')){
$output = $output.$key;
} elseif(($input[$count] == 'ô') && (array_search($input[$count+1], $special_char_list))){
$output = $output.' ';
} elseif(($input[$count] == 'ö') && (array_search($input[$count+1], $special_char_list))){
$output = $output.' ';
} elseif (($input[$count] == 'ö') && (array_search($input[$count+2], $regular_soup))){
//for uu oo uo cases look 2 char down on input to see if that char special also
//if not special char, then that means this char is the beginning of a 2 char VNI word.
} else {
$output = $output.$key;
}
} elseif ($key = array_search($input[$count], $onechar_html)){
//character is in $onechar_html
$output = $output.$key;
} else {
if(!(array_search($input[$count+1], $special_char_list))){
//next character not in special char list, then take previous char
$needle = $input[$count-1].$input[$count];
$output = substr("$output", 0, -1);
if($key = array_search($needle, $twochar_code)){
//two char needle is found in $twochar_code
$output = $output.$key;
} elseif ($key = array_search($needle, $twochar_html)){
//two char needle is found in $twochar_html
$output = $output.$key;
}
} else{
//next character is in special charlist so take next char and this char
$needle = $input[$count].$input[$count+1];
$count++;
if($key = array_search($needle, $twochar_code)){
//two char needle is found in $twochar_code
$output = $output.$key;
} elseif ($key = array_search($needle, $twochar_html)){
//two char needle is found in $twochar_html
$output = $output.$key;
}
}
}
}
}
echo $output;
?>