/ Published in: PHP
The unicode decoding functions were adapted from the Decoder.php source from the Zend Framework.
Expand |
Embed | Plain Text
<?php //http://framework.zend.com/svn/framework/standard/trunk/library/Zend/Json/Decoder.php function utf162utf8($utf16) { // Check for mb extension otherwise do by hand. } switch (true) { case ((0x7F & $bytes) == $bytes): // this case should never be reached, because we are in ASCII range // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 case (0x07FF & $bytes) == $bytes: // return a 2-byte UTF-8 character // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 case (0xFFFF & $bytes) == $bytes: // return a 3-byte UTF-8 character // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 } // ignoring UTF-32 for now, sorry return ''; } function decodeUnicodeString($chrs) { $utf8 = ''; for($i = 0; $i < $strlen_chrs; $i++) { switch (true) { // single, escaped unicode character $utf8 .= utf162utf8($utf16); $i += 5; break; case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F): $utf8 .= $chrs{$i}; break; case ($ord_chrs_c & 0xE0) == 0xC0: // characters U-00000080 - U-000007FF, mask 110XXXXX //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 ++$i; break; case ($ord_chrs_c & 0xF0) == 0xE0: // characters U-00000800 - U-0000FFFF, mask 1110XXXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $i += 2; break; case ($ord_chrs_c & 0xF8) == 0xF0: // characters U-00010000 - U-001FFFFF, mask 11110XXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $i += 3; break; case ($ord_chrs_c & 0xFC) == 0xF8: // characters U-00200000 - U-03FFFFFF, mask 111110XX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $i += 4; break; case ($ord_chrs_c & 0xFE) == 0xFC: // characters U-04000000 - U-7FFFFFFF, mask 1111110X // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $i += 5; break; } } return $utf8; } $json = json_encode(simplexml_load_file("the_file.xml")); ?>
You need to login to post a comment.
