Read a UTF16 String from a "byte" source


/ Published in: JavaScript
Save to your folder(s)

This is useful if you have a string where each character represents a byte like the one returned by getStringAt() in BinaryAjax.


Copy this code and paste it in your HTML
  1. function readUTF16String(bytes, bigEndian) {
  2. var ix = 0;
  3. var offset1 = 1, offset2 = 0;
  4. if( bytes.slice(0,2) == "\xFE\xFF") {
  5. bigEndian = true;
  6. ix = 2;
  7. } else if( bytes.slice(0,2) == "\xFF\xFE") {
  8. bigEndian = false;
  9. ix = 2;
  10. }
  11.  
  12. if( bigEndian ) {
  13. offset1 = 0;
  14. offset2 = 1;
  15. }
  16.  
  17. var string = "";
  18. for( ; ix < bytes.length; ix+=2 ) {
  19. var byte1 = bytes[ix+offset1].charCodeAt(0);
  20. var byte2 = bytes[ix+offset2].charCodeAt(0);
  21. var word1 = (byte1<<8)+byte2;
  22. if( byte1 < 0xD8 || byte1 >= 0xE0 ) {
  23. string += String.fromCharCode(word1);
  24. } else {
  25. ix+=2;
  26. var byte3 = bytes[ix+offset1].charCodeAt(0);
  27. var byte4 = bytes[ix+offset2].charCodeAt(0);
  28. var word2 = (byte3<<8)+byte4;
  29. string += String.fromCharCode(word1, word2);
  30. }
  31. }
  32.  
  33. return string;
  34. }

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.