Revision: 8179
Updated Code
at September 17, 2011 06:06 by wizard04
Updated Code
/***************************************** * String encoding/decoding functions * * This work is licensed under a Creative Commons Attribution 3.0 Unported License * http://creativecommons.org/licenses/by/3.0/ * * Author: Andy Harrison, http://dragonzreef.com/ * Date: 16 September 2011 *****************************************/ String.prototype.encodeToUrlPart = function(){ return encodeURIComponent(this); }; String.prototype.decodeFromUrlPart = function(){ return decodeURIComponent(this); }; String.prototype.encodeToUrl = function(){ return encodeURI(this); }; String.prototype.decodeFromUrl = function(){ return decodeURI(this); }; //HTML-escapes amphersands that do not designate a character reference String.prototype.encodeIncongruousAmphersands = function() { var str = this.toString(); if(!str) return ""; //escape amphersands that are obviously not designating character references str = str.replace(/&#x([A-F0-9]*(?:[^A-F0-9;]|$))/ig, "&#x$1"); str = str.replace(/&#(?!x)([0-9]*(?:[^0-9;]|$))/ig, "&#$1"); str = str.replace(/&(?!#)([a-z0-9]*(?:[^a-z0-9;]|$))/ig, "&$1"); //for substrings formatted as character entity references, make sure entity names are valid //see http://www.w3.org/TR/html401/sgml/entities.html var validEntityNames = ""+ //markup-significant and internationalization characters "quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+ "rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+ //ISO 8859-1 characters "nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+ "plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+ "Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+ "Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+ "Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+ "igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+ "uacute|ucirc|uuml|yacute|thorn|yuml|"+ //symbols, mathematical symbols, and Greek letters "fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+ "Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+ "kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+ "upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+ "darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+ "sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+ "le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+ "loz|spades|clubs|hearts|diams"; var rxp = new RegExp("&(?!("+validEntityNames+");)([a-zA-Z0-9]+;)", "g"); str.replace(rxp, "&$1"); return str; }; String.prototype.encodeToHtml = function(keepValidEntities) { var str = this.toString(); if(!str) return ""; str = keepValidEntities ? str.encodeIncongruousAmphersands() : str.replace(/&/g, "&"); str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); return str; }; //unescapes all entities in the string, not just the markup-significant characters String.prototype.decodeFromHtml = function() { var str = this.toString(); if(!str) return ""; var tmp = document.createElement("div"); tmp.innerHTML = str; return tmp.firstChild.nodeValue; }; //escapes the string for use as a JavaScript string in embedded or inline code //See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript String.prototype.encodeToJavaScriptString = function() { var str = this.toString(); if(!str) return ""; str = str.replace(/\\/g, "\\\\"); //prevent "escape from the quote" attacks by escaping quotes and line feed characters str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022"); str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085"); //tab, line feed, carriage return, next line str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029"); //line separator, paragraph separator //prevent string from closing the tag str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E"); //these are escaped just in case ("defense-in-depth") str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D"); return str; };
Revision: 8178
Updated Code
at September 12, 2008 16:14 by wizard04
Updated Code
function escapeToHTML(str) { str = str.replace(/&/g, "&"); str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); return str; } function unescapeFromHTML(str) { str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'"); str = str.replace(/&/g, "&"); return str; } //escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute) //See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript function escapeToScript(str) { str = str.replace(/\\/g, "\\\\"); //prevent "escape from the quote" attacks by escaping quotes and line feed characters str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085"); str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029"); str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022"); //prevent string from closing the tag str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E"); //these are escaped just in case ("defense-in-depth") str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D"); return str; } //fixes un-escaped amphersands function fixAmphersands(txt) { if(!txt) return ""; var newText = txt; do { txt = newText; //order matters here! newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7"); newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8"); newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8"); }while(txt != newText) //make sure entity names are valid //see http://www.w3.org/TR/html401/sgml/entities.html var validEntityNames = new RegExp("^("+ //markup-significant and internationalization characters "quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+ "rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+ //ISO 8859-1 characters "nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+ "plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+ "Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+ "Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+ "Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+ "igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+ "uacute|ucirc|uuml|yacute|thorn|yuml|"+ //symbols, mathematical symbols, and Greek letters "fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+ "Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+ "kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+ "upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+ "darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+ "sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+ "le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+ "loz|spades|clubs|hearts|diams)$"); var mtch = txt.match(/&[a-z0-9]+;/ig); var rxp; for(var i=0; mtch && i<mtch.length; i++) { if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1))) { //escape the amphersand rxp = new RegExp(mtch[i], "g"); txt = txt.replace(rxp, "&"+mtch[i].slice(1)); } } return txt; } //Sometimes, setting an element attribute (e.g., title) with javascript automatically escapes all amphersands in the // string, apparently to be "helpful". It also does this when creating a text node with document.createTextNode. //This function gets around this by setting innerHTML on a temporary element and returning the text node's value. function literalText(str) { var tmp = document.createElement("div"); tmp.innerHTML = str; return (tmp.firstChild ? tmp.firstChild.nodeValue : ""); }
Revision: 8177
Updated Code
at September 12, 2008 16:11 by wizard04
Updated Code
function escapeToHTML(str) { str = str.replace(/&/g, "&"); str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); return str; } function unescapeFromHTML(str) { str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'"); str = str.replace(/&/g, "&"); return str; } //escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute) //See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript function escapeToScript(str) { str = str.replace(/\\/g, "\\\\"); //prevent "escape from the quote" attacks by escaping quotes and line feed characters str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085"); str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029"); str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022"); //prevent string from closing the tag str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E"); //these are escaped just in case ("defense-in-depth") str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D"); return str; } //fixes un-escaped amphersands function fixAmphersands(txt) { if(!txt) return ""; var newText = txt; do { txt = newText; //order matters here! newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7"); newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8"); newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8"); }while(txt != newText) //make sure entity names are valid //see http://www.w3.org/TR/html401/sgml/entities.html var validEntityNames = new RegExp("^("+ //markup-significant and internationalization characters "quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+ "rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+ //ISO 8859-1 characters "nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+ "plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+ "Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+ "Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+ "Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+ "igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+ "uacute|ucirc|uuml|yacute|thorn|yuml|"+ //symbols, mathematical symbols, and Greek letters "fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+ "Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+ "kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+ "upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+ "darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+ "sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+ "le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+ "loz|spades|clubs|hearts|diams)$"); var mtch = txt.match(/&[a-z0-9]+;/ig); var rxp; for(var i=0; mtch && i<mtch.length; i++) { if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1))) { //escape the amphersand rxp = new RegExp(mtch[i], "g"); txt = txt.replace(rxp, "&"+mtch[i].slice(1)); } } return txt; } //Sometimes, setting an element attribute with javascript automatically escapes all amphersands in the string, apparently // to be "helpful". It also does this when creating a text node with document.createTextNode. //This function gets around this by setting innerHTML on a temporary element and returning the text node's value. function literalText(str) { var tmp = document.createElement("div"); tmp.innerHTML = str; return (tmp.firstChild ? tmp.firstChild.nodeValue : ""); }
Revision: 8176
Updated Code
at September 12, 2008 16:11 by wizard04
Updated Code
//See http://code.google.com/p/doctype/wiki/ArticleXSSInAttributes //and http://code.google.com/p/doctype/wiki/ArticleXSSInBodyText function escapeToHTML(str) { str = str.replace(/&/g, "&"); str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); return str; } function unescapeFromHTML(str) { str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'"); str = str.replace(/&/g, "&"); return str; } //escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute) //See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript function escapeToScript(str) { str = str.replace(/\\/g, "\\\\"); //prevent "escape from the quote" attacks by escaping quotes and line feed characters str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085"); str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029"); str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022"); //prevent string from closing the tag str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E"); //these are escaped just in case ("defense-in-depth") str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D"); return str; } //fixes un-escaped amphersands function fixAmphersands(txt) { if(!txt) return ""; var newText = txt; do { txt = newText; //order matters here! newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7"); newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8"); newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8"); }while(txt != newText) //make sure entity names are valid //see http://www.w3.org/TR/html401/sgml/entities.html var validEntityNames = new RegExp("^("+ //markup-significant and internationalization characters "quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+ "rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+ //ISO 8859-1 characters "nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+ "plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+ "Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+ "Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+ "Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+ "igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+ "uacute|ucirc|uuml|yacute|thorn|yuml|"+ //symbols, mathematical symbols, and Greek letters "fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+ "Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+ "kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+ "upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+ "darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+ "sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+ "le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+ "loz|spades|clubs|hearts|diams)$"); var mtch = txt.match(/&[a-z0-9]+;/ig); var rxp; for(var i=0; mtch && i<mtch.length; i++) { if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1))) { //escape the amphersand rxp = new RegExp(mtch[i], "g"); txt = txt.replace(rxp, "&"+mtch[i].slice(1)); } } return txt; } //Sometimes, setting an element attribute with javascript automatically escapes all amphersands in the string, apparently // to be "helpful". It also does this when creating a text node with document.createTextNode. //This function gets around this by setting innerHTML on a temporary element and returning the text node's value. function literalText(str) { var tmp = document.createElement("div"); tmp.innerHTML = str; return (tmp.firstChild ? tmp.firstChild.nodeValue : ""); }
Revision: 8175
Updated Code
at September 12, 2008 16:09 by wizard04
Updated Code
//See http://code.google.com/p/doctype/wiki/ArticleXSSInAttributes //and http://code.google.com/p/doctype/wiki/ArticleXSSInBodyText function escapeToHTML(str) { str = str.replace(/&/g, "&"); str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); return str; } function unescapeFromHTML(str) { str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'"); str = str.replace(/&/g, "&"); return str; } //escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute) //See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript function escapeToScript(str) { str = str.replace(/\\/g, "\\\\"); //prevent "escape from the quote" attacks by escaping quotes and line feed characters str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085"); str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029"); str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022"); //prevent string from closing the tag str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E"); //these are escaped just in case ("defense-in-depth") str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D"); return str; } //remove all tags from txt function stripHTML(txt) { txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " "); //replace these with a space txt = txt.replace(/<.*?>/g, ""); txt = txt.replace(/</g, "<"); txt = txt.replace(/>/g, ">"); return txt; } //fixes un-escaped amphersands function fixAmphersands(txt) { if(!txt) return ""; var newText = txt; do { txt = newText; //order matters here! newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7"); newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8"); newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8"); }while(txt != newText) //make sure entity names are valid //see http://www.w3.org/TR/html401/sgml/entities.html var validEntityNames = new RegExp("^("+ //markup-significant and internationalization characters "quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+ "rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+ //ISO 8859-1 characters "nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+ "plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+ "Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+ "Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+ "Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+ "igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+ "uacute|ucirc|uuml|yacute|thorn|yuml|"+ //symbols, mathematical symbols, and Greek letters "fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+ "Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+ "kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+ "upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+ "darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+ "sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+ "le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+ "loz|spades|clubs|hearts|diams)$"); var mtch = txt.match(/&[a-z0-9]+;/ig); var rxp; for(var i=0; mtch && i<mtch.length; i++) { if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1))) { //escape the amphersand rxp = new RegExp(mtch[i], "g"); txt = txt.replace(rxp, "&"+mtch[i].slice(1)); } } return txt; } //Sometimes, setting an element attribute with javascript automatically escapes all amphersands in the string, apparently // to be "helpful". It also does this when creating a text node with document.createTextNode. //This function gets around this by setting innerHTML on a temporary element and returning the text node's value. function literalText(str) { var tmp = document.createElement("div"); tmp.innerHTML = str; return (tmp.firstChild ? tmp.firstChild.nodeValue : ""); }
Revision: 8174
Updated Code
at September 8, 2008 08:33 by wizard04
Updated Code
function escapeToHTML(str){ return str.replace(/&/g, "&").replace(/>/g, ">").replace(/</g, "<"); } function unescapeFromHTML(str){ return str.replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); } function escapeToHTMLAttribute(str){ return str.replace(/&/g, "&").replace(/"/g, """).replace(/>/g, ">").replace(/</g, "<"); } function unescapeFromHTMLAttribute(str){ return str.replace(/"/g, "\"").replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); } //remove all tags from txt function stripHTML(txt) { txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " "); //replace these with a space txt = txt.replace(/<.*?>/g, ""); txt = txt.replace(/</g, "<"); txt = txt.replace(/>/g, ">"); return txt; } //remove scripting from txt; this is nowhere near thorough, obviously function sanitizeHTML(txt) { //get rid of scripting txt = txt.replace(/<script.*?<\/script>?/ig, ""); txt = txt.replace(/<a\s[^>]*href=(["'])\s*javascript:.*?\1.*?>(.*?)($|<\/a>)/ig, "$2"); txt = txt.replace(/<iframe.*?<\/iframe>?/ig, ""); //...what else? return txt; } //fixes un-escaped amphersands function fixAmphersands(txt) { if(!txt) return ""; var newText = txt; do { txt = newText; //order matters here! newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7"); newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8"); newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8"); }while(txt != newText) //make sure entity names are valid //see http://www.w3.org/TR/html401/sgml/entities.html var validEntityNames = new RegExp("^("+ //markup-significant and internationalization characters "quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+ "rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+ //ISO 8859-1 characters "nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+ "plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+ "Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+ "Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+ "Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+ "igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+ "uacute|ucirc|uuml|yacute|thorn|yuml|"+ //symbols, mathematical symbols, and Greek letters "fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+ "Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+ "kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+ "upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+ "darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+ "sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+ "le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+ "loz|spades|clubs|hearts|diams)$"); var mtch = txt.match(/&[a-z0-9]+;/ig); var rxp; for(var i=0; mtch && i<mtch.length; i++) { if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1))) { //escape the amphersand rxp = new RegExp(mtch[i], "g"); txt = txt.replace(rxp, "&"+mtch[i].slice(1)); } } return txt; } function unFixAmphersands(txt) { if(!txt) return ""; return txt.replace(/&/ig, "&"); }
Revision: 8173
Updated Code
at September 7, 2008 01:00 by wizard04
Updated Code
function escapeToHTML(str){ return str.replace(/&/g, "&").replace(/>/g, ">").replace(/</g, "<"); } function unescapeFromHTML(str){ return str.replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); } function escapeToHTMLAttribute(str){ return str.replace(/&/g, "&").replace(/"/g, """).replace(/>/g, ">").replace(/</g, "<"); } function unescapeFromHTMLAttribute(str){ return str.replace(/"/g, "\"").replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); } //remove all tags from txt function stripHTML(txt) { txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " "); //replace these with a space txt = txt.replace(/<.*?>/g, ""); txt = txt.replace(/</g, "<"); txt = txt.replace(/>/g, ">"); return txt; } //remove scripting from txt; this is nowhere near thorough, obviously function sanitizeHTML(txt) { //get rid of scripting txt = txt.replace(/<script.*?<\/script>?/ig, ""); txt = txt.replace(/<a\s[^>]*href=(["'])\s*javascript:.*?\1.*?>(.*?)($|<\/a>)/ig, "$2"); txt = txt.replace(/<iframe.*?<\/iframe>?/ig, ""); //...what else? return txt; } //fixes un-escaped amphersands function fixAmphersands(txt) { if(!txt) return ""; var newText = txt; do { txt = newText; //order matters here! newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7"); newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8"); newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8"); }while(txt != newText) //make sure entity names are valid //see http://www.w3.org/TR/html401/sgml/entities.html var validEntityNames = new RegExp("^("+ //markup-significant and internationalization characters "quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+ "rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+ //ISO 8859-1 characters "nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+ "plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+ "Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+ "Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+ "Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+ "igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+ "uacute|ucirc|uuml|yacute|thorn|yuml|"+ //symbols, mathematical symbols, and Greek letters "fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+ "Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+ "kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+ "upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+ "darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+ "sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+ "le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+ "loz|spades|clubs|hearts|diams)$"); var mtch = txt.match(/&[a-z0-9]+;/ig); var rxp; for(var i=0; mtch && i<mtch.length; i++) { if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1))) { //escape the amphersand rxp = new RegExp(mtch[i], "g"); txt = txt.replace(rxp, "&"+mtch[i].slice(1)); } } return txt; } function unFixAmphersands(txt) { if(!txt) return ""; return txt.replace(/&/ig, "&"); }
Revision: 8172
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at September 5, 2008 13:43 by wizard04
Initial Code
function escapeToHTML(str){ return str.replace(/&/g, "&").replace(/>/g, ">").replace(/</g, "<"); } function unescapeFromHTML(str){ return str.replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); } function escapeToHTMLAttribute(str){ return str.replace(/&/g, "&").replace(/"/g, """).replace(/>/g, ">").replace(/</g, "<"); } function unescapeFromHTMLAttribute(str){ return str.replace(/"/g, "\"").replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); } //remove all tags from txt function stripHTML(txt) { txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " "); //replace these with a space txt = txt.replace(/<.*?>/g, ""); txt = txt.replace(/</g, "<"); txt = txt.replace(/>/g, ">"); return txt; } //remove scripting from txt; this is nowhere near thorough, obviously function sanitizeHTML(txt) { //get rid of scripting txt = txt.replace(/<script.*?<\/script>?/ig, ""); txt = txt.replace(/<a\s[^>]*href=(["'])\s*javascript:.*?\1.*?>(.*?)($|<\/a>)/ig, "$2"); txt = txt.replace(/<iframe.*?<\/iframe>?/ig, ""); //...what else? return txt; } //fixes un-escaped amphersands function fixAmphersands(txt) { if(!txt) return ""; var newText = txt; do { txt = newText; //order matters here! newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7"); newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8"); newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8"); }while(txt != newText) //make sure entity names are valid //see http://www.w3.org/TR/html401/sgml/entities.html var mtch = txt.match(/&[a-z0-9]+;/ig); var rxp; for(var i=0; mtch && i<mtch.length; i++) { switch(mtch[i].slice(1,mtch[i].length-1)) { //markup-significant and internationalization characters case "quot": case "amp": case "lt": case "gt": case "OElig": case "oelig": case "Scaron": case "scaron": case "Yuml": case "circ": case "tilde": case "ensp": case "emsp": case "thinsp": case "zwnj": case "zwj": case "lrm": case "rlm": case "ndash": case "mdash": case "lsquo": case "rsquo": case "sbquo": case "ldquo": case "rdquo": case "bdquo": case "dagger": case "Dagger": case "permil": case "lsaquo": case "rsaquo": case "euro": break; //ISO 8859-1 characters case "nbsp": case "iexcl": case "cent": case "pound": case "curren": case "yen": case "brvbar": case "sect": case "uml": case "copy": case "ordf": case "laquo": case "not": case "shy": case "reg": case "macr": case "deg": case "plusmn": case "sup2": case "sup3": case "acute": case "micro": case "para": case "middot": case "cedil": case "sup1": case "ordm": case "raquo": case "frac14": case "frac12": case "frac34": case "iquest": case "Agrave": case "Aacute": case "Acirc": case "Atilde": case "Auml": case "Aring": case "AElig": case "Ccedil": case "Egrave": case "Eacute": case "Ecirc": case "Euml": case "Igrave": case "Iacute": case "Icirc": case "Iuml": case "ETH": case "Ntilde": case "Ograve": case "Oacute": case "Ocirc": case "Otilde": case "Ouml": case "times": case "Oslash": case "Ugrave": case "Uacute": case "Ucirc": case "Uuml": case "Yacute": case "THORN": case "szlig": case "agrave": case "aacute": case "acirc": case "atilde": case "auml": case "aring": case "aelig": case "ccedil": case "egrave": case "eacute": case "ecirc": case "euml": case "igrave": case "iacute": case "icirc": case "iuml": case "eth": case "ntilde": case "ograve": case "oacute": case "ocirc": case "otilde": case "ouml": case "divide": case "oslash": case "ugrave": case "uacute": case "ucirc": case "uuml": case "yacute": case "thorn": case "yuml": break; //symbols, mathematical symbols, and Greek letters case "fnof": case "Alpha": case "Beta": case "Gamma": case "Delta": case "Epsilon": case "Zeta": case "Eta": case "Theta": case "Iota": case "Kappa": case "Lambda": case "Mu": case "Nu": case "Xi": case "Omicron": case "Pi": case "Rho": case "Sigma": case "Tau": case "Upsilon": case "Phi": case "Chi": case "Psi": case "Omega": case "alpha": case "beta": case "gamma": case "delta": case "epsilon": case "zeta": case "eta": case "theta": case "iota": case "kappa": case "lambda": case "mu": case "nu": case "xi": case "omicron": case "pi": case "rho": case "sigmaf": case "sigma": case "tau": case "upsilon": case "phi": case "chi": case "psi": case "omega": case "thetasym": case "upsih": case "piv": case "bull": case "hellip": case "prime": case "Prime": case "oline": case "frasl": case "weierp": case "image": case "real": case "trade": case "alefsym": case "larr": case "uarr": case "rarr": case "darr": case "harr": case "crarr": case "lArr": case "uArr": case "rArr": case "dArr": case "hArr": case "forall": case "part": case "exist": case "empty": case "nabla": case "isin": case "notin": case "ni": case "prod": case "sum": case "minus": case "lowast": case "radic": case "prop": case "infin": case "ang": case "and": case "or": case "cap": case "cup": case "int": case "there4": case "sim": case "cong": case "asymp": case "ne": case "equiv": case "le": case "ge": case "sub": case "sup": case "nsub": case "sube": case "supe": case "oplus": case "otimes": case "perp": case "sdot": case "lceil": case "rceil": case "lfloor": case "rfloor": case "lang": case "rang": case "loz": case "spades": case "clubs": case "hearts": case "diams": break; default: //escape the amphersand rxp = new RegExp(mtch[i], "g"); txt = txt.replace(rxp, "&"+mtch[i].slice(1)); } } return txt; } function unFixAmphersands(txt) { if(!txt) return ""; return txt.replace(/&/ig, "&"); }
Initial URL
Initial Description
Initial Title
URL and HTML Encoding
Initial Tags
url, javascript, html
Initial Language
JavaScript