Posted By

michaelfox on 04/09/09


Tagged

textmate utf8 magic characters encoding charset


Versions (?)

Who likes this?

1 person have marked this snippet as a favorite

vali29


decode utf8 charset


 / Published in: PHP
 

  1. function charset_decode_utf_8($string) {
  2. /* Only do the slow convert if there are 8-bit characters */
  3. /* avoid using 0xA0 (240) in ereg ranges. RH73 does not like that */
  4. if (! ereg("[200-237]", $string) and ! ereg("[241-377]", $string))
  5. return $string;
  6.  
  7. // decode three byte unicode characters
  8. $string = preg_replace("/([340-357])([200-277])([200-277])/e",
  9. "'&#'.((ord('\1')-224)*4096 + (ord('\2')-128)*64 + (ord('\3')-128)).';'",
  10. $string);
  11.  
  12. // decode two byte unicode characters
  13. $string = preg_replace("/([300-337])([200-277])/e",
  14. "'&#'.((ord('\1')-192)*64+(ord('\2')-128)).';'",
  15. $string);
  16.  
  17. return $string;
  18. }

Report this snippet  

You need to login to post a comment.