Posted By

wizard04 on 09/05/08


Tagged

escape url javascript html encode


Versions (?)

Who likes this?

1 person have marked this snippet as a favorite

jamesming


URL and HTML Encoding


 / Published in: JavaScript
 

  1. /*****************************************
  2.  * String encoding/decoding functions
  3.  *
  4.  * This work is licensed under a Creative Commons Attribution 3.0 Unported License
  5.  * http://creativecommons.org/licenses/by/3.0/
  6.  *
  7.  * Author: Andy Harrison, http://dragonzreef.com/
  8.  * Date: 16 September 2011
  9.  *****************************************/
  10.  
  11. String.prototype.encodeToUrlPart = function(){ return encodeURIComponent(this); };
  12. String.prototype.decodeFromUrlPart = function(){ return decodeURIComponent(this); };
  13. String.prototype.encodeToUrl = function(){ return encodeURI(this); };
  14. String.prototype.decodeFromUrl = function(){ return decodeURI(this); };
  15.  
  16. //HTML-escapes amphersands that do not designate a character reference
  17. String.prototype.encodeIncongruousAmphersands = function()
  18. {
  19. var str = this.toString();
  20. if(!str) return "";
  21.  
  22. //escape amphersands that are obviously not designating character references
  23. str = str.replace(/&#x([A-F0-9]*(?:[^A-F0-9;]|$))/ig, "&#x$1");
  24. str = str.replace(/&#(?!x)([0-9]*(?:[^0-9;]|$))/ig, "&#$1");
  25. str = str.replace(/&(?!#)([a-z0-9]*(?:[^a-z0-9;]|$))/ig, "&$1");
  26.  
  27. //for substrings formatted as character entity references, make sure entity names are valid
  28. //see http://www.w3.org/TR/html401/sgml/entities.html
  29.  
  30. var validEntityNames = ""+
  31. //markup-significant and internationalization characters
  32. "quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
  33. "rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
  34. //ISO 8859-1 characters
  35. "nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
  36. "plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
  37. "Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
  38. "Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
  39. "Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
  40. "igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
  41. "uacute|ucirc|uuml|yacute|thorn|yuml|"+
  42. //symbols, mathematical symbols, and Greek letters
  43. "fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
  44. "Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
  45. "kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
  46. "upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
  47. "darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
  48. "sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
  49. "le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
  50. "loz|spades|clubs|hearts|diams";
  51.  
  52. var rxp = new RegExp("&(?!("+validEntityNames+");)([a-zA-Z0-9]+;)", "g");
  53. str.replace(rxp, "&$1");
  54.  
  55. return str;
  56. };
  57. String.prototype.encodeToHtml = function(keepValidEntities)
  58. {
  59. var str = this.toString();
  60. if(!str) return "";
  61. str = keepValidEntities ? str.encodeIncongruousAmphersands() : str.replace(/&/g, "&");
  62. str = str.replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
  63. return str;
  64. };
  65. //unescapes all entities in the string, not just the markup-significant characters
  66. String.prototype.decodeFromHtml = function()
  67. {
  68. var str = this.toString();
  69. if(!str) return "";
  70. var tmp = document.createElement("div");
  71. tmp.innerHTML = str;
  72. return tmp.firstChild.nodeValue;
  73. };
  74.  
  75. //escapes the string for use as a JavaScript string in embedded or inline code
  76. //See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
  77. String.prototype.encodeToJavaScriptString = function()
  78. {
  79. var str = this.toString();
  80. if(!str) return "";
  81.  
  82. str = str.replace(/\\/g, "\\\\");
  83.  
  84. //prevent "escape from the quote" attacks by escaping quotes and line feed characters
  85. str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
  86. str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085"); //tab, line feed, carriage return, next line
  87. str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029"); //line separator, paragraph separator
  88.  
  89. //prevent string from closing the tag
  90. str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
  91.  
  92. //these are escaped just in case ("defense-in-depth")
  93. str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
  94.  
  95. return str;
  96. };

Report this snippet  

Comments

RSS Icon Subscribe to comments
Posted By: Scooter on September 5, 2008

Would be more elegant to replace the massive switch statement with an array of the entity names.

Posted By: wizard04 on September 7, 2008

Excellent point. Made it a regex.

Posted By: wizard04 on April 16, 2014

The latest: https://gist.github.com/wizard04wsu/8831356

You need to login to post a comment.