Return to Snippet

Revision: 8179
at September 17, 2011 06:06 by wizard04


Updated Code
/*****************************************
 * String encoding/decoding functions
 * 
 * This work is licensed under a Creative Commons Attribution 3.0 Unported License
 * http://creativecommons.org/licenses/by/3.0/
 *
 * Author: Andy Harrison, http://dragonzreef.com/
 * Date: 16 September 2011
 *****************************************/

String.prototype.encodeToUrlPart = function(){ return encodeURIComponent(this); };
String.prototype.decodeFromUrlPart = function(){ return decodeURIComponent(this); };
String.prototype.encodeToUrl = function(){ return encodeURI(this); };
String.prototype.decodeFromUrl = function(){ return decodeURI(this); };

//HTML-escapes amphersands that do not designate a character reference
String.prototype.encodeIncongruousAmphersands = function()
{
	var str = this.toString();
	if(!str) return "";
	
	//escape amphersands that are obviously not designating character references
	str = str.replace(/&#x([A-F0-9]*(?:[^A-F0-9;]|$))/ig, "&#x$1");
	str = str.replace(/&#(?!x)([0-9]*(?:[^0-9;]|$))/ig, "&#$1");
	str = str.replace(/&(?!#)([a-z0-9]*(?:[^a-z0-9;]|$))/ig, "&$1");
	
	//for substrings formatted as character entity references, make sure entity names are valid
	//see http://www.w3.org/TR/html401/sgml/entities.html
	
	var validEntityNames = ""+
		//markup-significant and internationalization characters
		"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
		"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
		//ISO 8859-1 characters
		"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
		"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
		"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
		"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
		"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
		"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
		"uacute|ucirc|uuml|yacute|thorn|yuml|"+
		//symbols, mathematical symbols, and Greek letters
		"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
		"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
		"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
		"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
		"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
		"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
		"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
		"loz|spades|clubs|hearts|diams";
	
	var rxp = new RegExp("&(?!("+validEntityNames+");)([a-zA-Z0-9]+;)", "g");
	str.replace(rxp, "&$1");
	
	return str;
};
String.prototype.encodeToHtml = function(keepValidEntities)
{
	var str = this.toString();
	if(!str) return "";
	str = keepValidEntities ? str.encodeIncongruousAmphersands() : str.replace(/&/g, "&");
	str = str.replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
	return str;
};
//unescapes all entities in the string, not just the markup-significant characters
String.prototype.decodeFromHtml = function()
{
	var str = this.toString();
	if(!str) return "";
	var tmp = document.createElement("div");
	tmp.innerHTML = str;
	return tmp.firstChild.nodeValue;
};

//escapes the string for use as a JavaScript string in embedded or inline code
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
String.prototype.encodeToJavaScriptString = function()
{
	var str = this.toString();
	if(!str) return "";
	
	str = str.replace(/\\/g, "\\\\");
	
	//prevent "escape from the quote" attacks by escaping quotes and line feed characters
	str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
	str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085");	//tab, line feed, carriage return, next line
	str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029");	//line separator, paragraph separator
	
	//prevent string from closing the tag
	str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
	
	//these are escaped just in case ("defense-in-depth")
	str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
	
	return str;
};

Revision: 8178
at September 12, 2008 16:14 by wizard04


Updated Code
function escapeToHTML(str)
{
	str = str.replace(/&/g, "&amp;");
	str = str.replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
	return str;
}
function unescapeFromHTML(str)
{
	str = str.replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, "\"").replace(/&#39;/g, "'");
	str = str.replace(/&amp;/g, "&");
	return str;
}

//escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute)
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
function escapeToScript(str)
{
	str = str.replace(/\\/g, "\\\\");
	
	//prevent "escape from the quote" attacks by escaping quotes and line feed characters
	str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085");
	str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029");
	str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
	
	//prevent string from closing the tag
	str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
	
	//these are escaped just in case ("defense-in-depth")
	str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
	
	return str;
}

//fixes un-escaped amphersands
function fixAmphersands(txt)
{
	if(!txt) return "";
	var newText = txt;
	do
	{
		txt = newText;
		//order matters here!
		newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&amp;#x$1$7");
		newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&amp;#$1$2$8");
		newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&amp;$1$2$8");
	}while(txt != newText)
	
	//make sure entity names are valid
	//see http://www.w3.org/TR/html401/sgml/entities.html
	
	var validEntityNames = new RegExp("^("+
		//markup-significant and internationalization characters
		"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
		"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
		//ISO 8859-1 characters
		"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
		"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
		"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
		"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
		"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
		"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
		"uacute|ucirc|uuml|yacute|thorn|yuml|"+
		//symbols, mathematical symbols, and Greek letters
		"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
		"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
		"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
		"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
		"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
		"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
		"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
		"loz|spades|clubs|hearts|diams)$");
	
	var mtch = txt.match(/&[a-z0-9]+;/ig);
	var rxp;
	for(var i=0; mtch && i<mtch.length; i++)
	{
		if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
		{
			//escape the amphersand
			rxp = new RegExp(mtch[i], "g");
			txt = txt.replace(rxp, "&amp;"+mtch[i].slice(1));
		}
	}
	
	return txt;
}

//Sometimes, setting an element attribute (e.g., title) with javascript automatically escapes all amphersands in the
// string, apparently to be "helpful". It also does this when creating a text node with document.createTextNode.
//This function gets around this by setting innerHTML on a temporary element and returning the text node's value.
function literalText(str)
{
	var tmp = document.createElement("div");
	tmp.innerHTML = str;
	return (tmp.firstChild ? tmp.firstChild.nodeValue : "");
}

Revision: 8177
at September 12, 2008 16:11 by wizard04


Updated Code
function escapeToHTML(str)
{
	str = str.replace(/&/g, "&amp;");
	str = str.replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
	return str;
}
function unescapeFromHTML(str)
{
	str = str.replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, "\"").replace(/&#39;/g, "'");
	str = str.replace(/&amp;/g, "&");
	return str;
}

//escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute)
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
function escapeToScript(str)
{
	str = str.replace(/\\/g, "\\\\");
	
	//prevent "escape from the quote" attacks by escaping quotes and line feed characters
	str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085");
	str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029");
	str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
	
	//prevent string from closing the tag
	str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
	
	//these are escaped just in case ("defense-in-depth")
	str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
	
	return str;
}

//fixes un-escaped amphersands
function fixAmphersands(txt)
{
	if(!txt) return "";
	var newText = txt;
	do
	{
		txt = newText;
		//order matters here!
		newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&amp;#x$1$7");
		newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&amp;#$1$2$8");
		newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&amp;$1$2$8");
	}while(txt != newText)
	
	//make sure entity names are valid
	//see http://www.w3.org/TR/html401/sgml/entities.html
	
	var validEntityNames = new RegExp("^("+
		//markup-significant and internationalization characters
		"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
		"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
		//ISO 8859-1 characters
		"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
		"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
		"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
		"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
		"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
		"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
		"uacute|ucirc|uuml|yacute|thorn|yuml|"+
		//symbols, mathematical symbols, and Greek letters
		"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
		"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
		"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
		"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
		"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
		"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
		"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
		"loz|spades|clubs|hearts|diams)$");
	
	var mtch = txt.match(/&[a-z0-9]+;/ig);
	var rxp;
	for(var i=0; mtch && i<mtch.length; i++)
	{
		if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
		{
			//escape the amphersand
			rxp = new RegExp(mtch[i], "g");
			txt = txt.replace(rxp, "&amp;"+mtch[i].slice(1));
		}
	}
	
	return txt;
}

//Sometimes, setting an element attribute with javascript automatically escapes all amphersands in the string, apparently
// to be "helpful". It also does this when creating a text node with document.createTextNode.
//This function gets around this by setting innerHTML on a temporary element and returning the text node's value.
function literalText(str)
{
	var tmp = document.createElement("div");
	tmp.innerHTML = str;
	return (tmp.firstChild ? tmp.firstChild.nodeValue : "");
}

Revision: 8176
at September 12, 2008 16:11 by wizard04


Updated Code
//See http://code.google.com/p/doctype/wiki/ArticleXSSInAttributes
//and http://code.google.com/p/doctype/wiki/ArticleXSSInBodyText
function escapeToHTML(str)
{
	str = str.replace(/&/g, "&amp;");
	str = str.replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
	return str;
}
function unescapeFromHTML(str)
{
	str = str.replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, "\"").replace(/&#39;/g, "'");
	str = str.replace(/&amp;/g, "&");
	return str;
}

//escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute)
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
function escapeToScript(str)
{
	str = str.replace(/\\/g, "\\\\");
	
	//prevent "escape from the quote" attacks by escaping quotes and line feed characters
	str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085");
	str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029");
	str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
	
	//prevent string from closing the tag
	str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
	
	//these are escaped just in case ("defense-in-depth")
	str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
	
	return str;
}

//fixes un-escaped amphersands
function fixAmphersands(txt)
{
	if(!txt) return "";
	var newText = txt;
	do
	{
		txt = newText;
		//order matters here!
		newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&amp;#x$1$7");
		newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&amp;#$1$2$8");
		newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&amp;$1$2$8");
	}while(txt != newText)
	
	//make sure entity names are valid
	//see http://www.w3.org/TR/html401/sgml/entities.html
	
	var validEntityNames = new RegExp("^("+
		//markup-significant and internationalization characters
		"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
		"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
		//ISO 8859-1 characters
		"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
		"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
		"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
		"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
		"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
		"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
		"uacute|ucirc|uuml|yacute|thorn|yuml|"+
		//symbols, mathematical symbols, and Greek letters
		"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
		"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
		"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
		"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
		"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
		"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
		"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
		"loz|spades|clubs|hearts|diams)$");
	
	var mtch = txt.match(/&[a-z0-9]+;/ig);
	var rxp;
	for(var i=0; mtch && i<mtch.length; i++)
	{
		if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
		{
			//escape the amphersand
			rxp = new RegExp(mtch[i], "g");
			txt = txt.replace(rxp, "&amp;"+mtch[i].slice(1));
		}
	}
	
	return txt;
}

//Sometimes, setting an element attribute with javascript automatically escapes all amphersands in the string, apparently
// to be "helpful". It also does this when creating a text node with document.createTextNode.
//This function gets around this by setting innerHTML on a temporary element and returning the text node's value.
function literalText(str)
{
	var tmp = document.createElement("div");
	tmp.innerHTML = str;
	return (tmp.firstChild ? tmp.firstChild.nodeValue : "");
}

Revision: 8175
at September 12, 2008 16:09 by wizard04


Updated Code
//See http://code.google.com/p/doctype/wiki/ArticleXSSInAttributes
//and http://code.google.com/p/doctype/wiki/ArticleXSSInBodyText
function escapeToHTML(str)
{
	str = str.replace(/&/g, "&amp;");
	str = str.replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
	return str;
}
function unescapeFromHTML(str)
{
	str = str.replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, "\"").replace(/&#39;/g, "'");
	str = str.replace(/&amp;/g, "&");
	return str;
}

//escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute)
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
function escapeToScript(str)
{
	str = str.replace(/\\/g, "\\\\");
	
	//prevent "escape from the quote" attacks by escaping quotes and line feed characters
	str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085");
	str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029");
	str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
	
	//prevent string from closing the tag
	str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
	
	//these are escaped just in case ("defense-in-depth")
	str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
	
	return str;
}

//remove all tags from txt
function stripHTML(txt)
{
	txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " ");	//replace these with a space
	txt = txt.replace(/<.*?>/g, "");
	txt = txt.replace(/</g, "&lt;");
	txt = txt.replace(/>/g, "&gt;");
	return txt;
}

//fixes un-escaped amphersands
function fixAmphersands(txt)
{
	if(!txt) return "";
	var newText = txt;
	do
	{
		txt = newText;
		//order matters here!
		newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&amp;#x$1$7");
		newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&amp;#$1$2$8");
		newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&amp;$1$2$8");
	}while(txt != newText)
	
	//make sure entity names are valid
	//see http://www.w3.org/TR/html401/sgml/entities.html
	
	var validEntityNames = new RegExp("^("+
		//markup-significant and internationalization characters
		"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
		"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
		//ISO 8859-1 characters
		"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
		"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
		"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
		"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
		"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
		"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
		"uacute|ucirc|uuml|yacute|thorn|yuml|"+
		//symbols, mathematical symbols, and Greek letters
		"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
		"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
		"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
		"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
		"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
		"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
		"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
		"loz|spades|clubs|hearts|diams)$");
	
	var mtch = txt.match(/&[a-z0-9]+;/ig);
	var rxp;
	for(var i=0; mtch && i<mtch.length; i++)
	{
		if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
		{
			//escape the amphersand
			rxp = new RegExp(mtch[i], "g");
			txt = txt.replace(rxp, "&amp;"+mtch[i].slice(1));
		}
	}
	
	return txt;
}

//Sometimes, setting an element attribute with javascript automatically escapes all amphersands in the string, apparently
// to be "helpful". It also does this when creating a text node with document.createTextNode.
//This function gets around this by setting innerHTML on a temporary element and returning the text node's value.
function literalText(str)
{
	var tmp = document.createElement("div");
	tmp.innerHTML = str;
	return (tmp.firstChild ? tmp.firstChild.nodeValue : "");
}

Revision: 8174
at September 8, 2008 08:33 by wizard04


Updated Code
function escapeToHTML(str){ return str.replace(/&/g, "&amp;").replace(/>/g, "&gt;").replace(/</g, "&lt;"); }
function unescapeFromHTML(str){ return str.replace(/&gt;/g, ">").replace(/&lt;/g, "<").replace(/&amp;/g, "&"); }
function escapeToHTMLAttribute(str){
	return str.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/>/g, "&gt;").replace(/</g, "&lt;"); }
function unescapeFromHTMLAttribute(str){
	return str.replace(/&quot;/g, "\"").replace(/&gt;/g, ">").replace(/&lt;/g, "<").replace(/&amp;/g, "&"); }

//remove all tags from txt
function stripHTML(txt)
{
	txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " ");	//replace these with a space
	txt = txt.replace(/<.*?>/g, "");
	txt = txt.replace(/</g, "&lt;");
	txt = txt.replace(/>/g, "&gt;");
	return txt;
}

//remove scripting from txt; this is nowhere near thorough, obviously
function sanitizeHTML(txt)
{
	//get rid of scripting
	txt = txt.replace(/<script.*?<\/script>?/ig, "");
	txt = txt.replace(/<a\s[^>]*href=(["'])\s*javascript:.*?\1.*?>(.*?)($|<\/a>)/ig, "$2");
	txt = txt.replace(/<iframe.*?<\/iframe>?/ig, "");
	
	//...what else?
	
	return txt;
}

//fixes un-escaped amphersands
function fixAmphersands(txt)
{
	if(!txt) return "";
	var newText = txt;
	do
	{
		txt = newText;
		//order matters here!
		newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&amp;#x$1$7");
		newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&amp;#$1$2$8");
		newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&amp;$1$2$8");
	}while(txt != newText)
	
	//make sure entity names are valid
	//see http://www.w3.org/TR/html401/sgml/entities.html
	
	var validEntityNames = new RegExp("^("+
		//markup-significant and internationalization characters
		"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
		"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
		//ISO 8859-1 characters
		"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
		"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
		"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
		"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
		"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
		"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
		"uacute|ucirc|uuml|yacute|thorn|yuml|"+
		//symbols, mathematical symbols, and Greek letters
		"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
		"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
		"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
		"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
		"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
		"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
		"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
		"loz|spades|clubs|hearts|diams)$");
	
	var mtch = txt.match(/&[a-z0-9]+;/ig);
	var rxp;
	for(var i=0; mtch && i<mtch.length; i++)
	{
		if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
		{
			//escape the amphersand
			rxp = new RegExp(mtch[i], "g");
			txt = txt.replace(rxp, "&amp;"+mtch[i].slice(1));
		}
	}
	
	return txt;
}
function unFixAmphersands(txt)
{
	if(!txt) return "";
	return txt.replace(/&amp;/ig, "&");
}

Revision: 8173
at September 7, 2008 01:00 by wizard04


Updated Code
function escapeToHTML(str){ return str.replace(/&/g, "&amp;").replace(/>/g, "&gt;").replace(/</g, "&lt;"); }
function unescapeFromHTML(str){ return str.replace(/&gt;/g, ">").replace(/&lt;/g, "<").replace(/&amp;/g, "&"); }
function escapeToHTMLAttribute(str){
	return str.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/>/g, "&gt;").replace(/</g, "&lt;"); }
function unescapeFromHTMLAttribute(str){
	return str.replace(/&quot;/g, "\"").replace(/&gt;/g, ">").replace(/&lt;/g, "<").replace(/&amp;/g, "&"); }

//remove all tags from txt
function stripHTML(txt)
{
	txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " ");	//replace these with a space
	txt = txt.replace(/<.*?>/g, "");
	txt = txt.replace(/</g, "&lt;");
	txt = txt.replace(/>/g, "&gt;");
	return txt;
}

//remove scripting from txt; this is nowhere near thorough, obviously
function sanitizeHTML(txt)
{
	//get rid of scripting
	txt = txt.replace(/<script.*?<\/script>?/ig, "");
	txt = txt.replace(/<a\s[^>]*href=(["'])\s*javascript:.*?\1.*?>(.*?)($|<\/a>)/ig, "$2");
	txt = txt.replace(/<iframe.*?<\/iframe>?/ig, "");
	
	//...what else?
	
	return txt;
}

//fixes un-escaped amphersands
function fixAmphersands(txt)
{
	if(!txt) return "";
	var newText = txt;
	do
	{
		txt = newText;
		//order matters here!
		newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&amp;#x$1$7");
		newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&amp;#$1$2$8");
		newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&amp;$1$2$8");
	}while(txt != newText)
	
	//make sure entity names are valid
	//see http://www.w3.org/TR/html401/sgml/entities.html
	
	var validEntityNames = new RegExp("^("+
		//markup-significant and internationalization characters
		"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
		"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
		//ISO 8859-1 characters
		"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
		"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
		"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
		"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
		"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
		"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
		"uacute|ucirc|uuml|yacute|thorn|yuml|"+
		//symbols, mathematical symbols, and Greek letters
		"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
		"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
		"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
		"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
		"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
		"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
		"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
		"loz|spades|clubs|hearts|diams)$");
	
	var mtch = txt.match(/&[a-z0-9]+;/ig);
	var rxp;
	for(var i=0; mtch && i<mtch.length; i++)
	{
		if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
		{
			//escape the amphersand
			rxp = new RegExp(mtch[i], "g");
			txt = txt.replace(rxp, "&amp;"+mtch[i].slice(1));
		}
	}

	
	return txt;
}
function unFixAmphersands(txt)
{
	if(!txt) return "";
	return txt.replace(/&amp;/ig, "&");
}

Revision: 8172
at September 5, 2008 13:43 by wizard04


Initial Code
function escapeToHTML(str){ return str.replace(/&/g, "&amp;").replace(/>/g, "&gt;").replace(/</g, "&lt;"); }
function unescapeFromHTML(str){ return str.replace(/&gt;/g, ">").replace(/&lt;/g, "<").replace(/&amp;/g, "&"); }
function escapeToHTMLAttribute(str){
	return str.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/>/g, "&gt;").replace(/</g, "&lt;"); }
function unescapeFromHTMLAttribute(str){
	return str.replace(/&quot;/g, "\"").replace(/&gt;/g, ">").replace(/&lt;/g, "<").replace(/&amp;/g, "&"); }

//remove all tags from txt
function stripHTML(txt)
{
	txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " ");	//replace these with a space
	txt = txt.replace(/<.*?>/g, "");
	txt = txt.replace(/</g, "&lt;");
	txt = txt.replace(/>/g, "&gt;");
	return txt;
}

//remove scripting from txt; this is nowhere near thorough, obviously
function sanitizeHTML(txt)
{
	//get rid of scripting
	txt = txt.replace(/<script.*?<\/script>?/ig, "");
	txt = txt.replace(/<a\s[^>]*href=(["'])\s*javascript:.*?\1.*?>(.*?)($|<\/a>)/ig, "$2");
	txt = txt.replace(/<iframe.*?<\/iframe>?/ig, "");
	
	//...what else?
	
	return txt;
}

//fixes un-escaped amphersands
function fixAmphersands(txt)
{
	if(!txt) return "";
	var newText = txt;
	do
	{
		txt = newText;
		//order matters here!
		newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&amp;#x$1$7");
		newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&amp;#$1$2$8");
		newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&amp;$1$2$8");
	}while(txt != newText)
	
	//make sure entity names are valid
	//see http://www.w3.org/TR/html401/sgml/entities.html
	var mtch = txt.match(/&[a-z0-9]+;/ig);
	var rxp;
	for(var i=0; mtch && i<mtch.length; i++)
	{
		switch(mtch[i].slice(1,mtch[i].length-1))
		{
			//markup-significant and internationalization characters
			case "quot": case "amp": case "lt": case "gt": case "OElig": case "oelig": case "Scaron": case "scaron":
			case "Yuml": case "circ": case "tilde": case "ensp": case "emsp": case "thinsp": case "zwnj": case "zwj":
			case "lrm": case "rlm": case "ndash": case "mdash": case "lsquo": case "rsquo": case "sbquo": case "ldquo":
			case "rdquo": case "bdquo": case "dagger": case "Dagger": case "permil": case "lsaquo": case "rsaquo":
			case "euro":
				break;
			//ISO 8859-1 characters
			case "nbsp": case "iexcl": case "cent": case "pound": case "curren": case "yen": case "brvbar": case "sect":
			case "uml": case "copy": case "ordf": case "laquo": case "not": case "shy": case "reg": case "macr":
			case "deg": case "plusmn": case "sup2": case "sup3": case "acute": case "micro": case "para": case "middot":
			case "cedil": case "sup1": case "ordm": case "raquo": case "frac14": case "frac12": case "frac34":
			case "iquest": case "Agrave": case "Aacute": case "Acirc": case "Atilde": case "Auml": case "Aring":
			case "AElig": case "Ccedil": case "Egrave": case "Eacute": case "Ecirc": case "Euml": case "Igrave":
			case "Iacute": case "Icirc": case "Iuml": case "ETH": case "Ntilde": case "Ograve": case "Oacute":
			case "Ocirc": case "Otilde": case "Ouml": case "times": case "Oslash": case "Ugrave": case "Uacute":
			case "Ucirc": case "Uuml": case "Yacute": case "THORN": case "szlig": case "agrave": case "aacute":
			case "acirc": case "atilde": case "auml": case "aring": case "aelig": case "ccedil": case "egrave":
			case "eacute": case "ecirc": case "euml": case "igrave": case "iacute": case "icirc": case "iuml": case "eth":
			case "ntilde": case "ograve": case "oacute": case "ocirc": case "otilde": case "ouml": case "divide":
			case "oslash": case "ugrave": case "uacute": case "ucirc": case "uuml": case "yacute": case "thorn":
			case "yuml":
				break;
			//symbols, mathematical symbols, and Greek letters
			case "fnof": case "Alpha": case "Beta": case "Gamma": case "Delta": case "Epsilon": case "Zeta": case "Eta":
			case "Theta": case "Iota": case "Kappa": case "Lambda": case "Mu": case "Nu": case "Xi": case "Omicron":
			case "Pi": case "Rho": case "Sigma": case "Tau": case "Upsilon": case "Phi": case "Chi": case "Psi":
			case "Omega": case "alpha": case "beta": case "gamma": case "delta": case "epsilon": case "zeta": case "eta":
			case "theta": case "iota": case "kappa": case "lambda": case "mu": case "nu": case "xi": case "omicron":
			case "pi": case "rho": case "sigmaf": case "sigma": case "tau": case "upsilon": case "phi": case "chi":
			case "psi": case "omega": case "thetasym": case "upsih": case "piv": case "bull": case "hellip": case "prime":
			case "Prime": case "oline": case "frasl": case "weierp": case "image": case "real": case "trade":
			case "alefsym": case "larr": case "uarr": case "rarr": case "darr": case "harr": case "crarr": case "lArr":
			case "uArr": case "rArr": case "dArr": case "hArr": case "forall": case "part": case "exist": case "empty":
			case "nabla": case "isin": case "notin": case "ni": case "prod": case "sum": case "minus": case "lowast":
			case "radic": case "prop": case "infin": case "ang": case "and": case "or": case "cap": case "cup": case "int":
			case "there4": case "sim": case "cong": case "asymp": case "ne": case "equiv": case "le": case "ge":
			case "sub": case "sup": case "nsub": case "sube": case "supe": case "oplus": case "otimes": case "perp":
			case "sdot": case "lceil": case "rceil": case "lfloor": case "rfloor": case "lang": case "rang": case "loz":
			case "spades": case "clubs": case "hearts": case "diams":
				break;
			default:
				//escape the amphersand
				rxp = new RegExp(mtch[i], "g");
				txt = txt.replace(rxp, "&amp;"+mtch[i].slice(1));
		}
	}
	
	return txt;
}
function unFixAmphersands(txt)
{
	if(!txt) return "";
	return txt.replace(/&amp;/ig, "&");
}

Initial URL

                                

Initial Description

                                

Initial Title
URL and HTML Encoding

Initial Tags
url, javascript, html

Initial Language
JavaScript