Posted By

coder_ on 02/14/11


Tagged

java html chars special


Versions (?)

HTML Special characters escape


 / Published in: Java
 

  1. /*
  2.  * Static String formatting and query routines.
  3.  * Copyright (C) 2001-2005 Stephen Ostermiller
  4.  * http://ostermiller.org/contact.pl?regarding=Java+Utilities
  5.  *
  6.  * This program is free software; you can redistribute it and/or modify
  7.  * it under the terms of the GNU General Public License as published by
  8.  * the Free Software Foundation; either version 2 of the License, or
  9.  * (at your option) any later version.
  10.  *
  11.  * This program is distributed in the hope that it will be useful,
  12.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14.  * GNU General Public License for more details.
  15.  *
  16.  * See COPYING.TXT for details.
  17.  */
  18.  
  19. /**
  20.  * Utilities for String formatting, manipulation, and queries.
  21.  * More information about this class is available from <a target="_top" href=
  22.  * "http://ostermiller.org/utils/StringHelper.html">ostermiller.org</a>.
  23.  *
  24.  * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
  25.  * @since ostermillerutils 1.00.00
  26.  */
  27. public class StringHelper {
  28.  
  29. /**
  30.   * Replaces characters that may be confused by a HTML
  31.   * parser with their equivalent character entity references.
  32.   *
  33.   * Any data that will appear as text on a web page should
  34.   * be be escaped. This is especially important for data
  35.   * that comes from untrusted sources such as Internet users.
  36.   * A common mistake in CGI programming is to ask a user for
  37.   * data and then put that data on a web page. For example:<pre>
  38.   * Server: What is your name?
  39.   * User: &lt;b&gt;Joe&lt;b&gt;
  40.   * Server: Hello <b>Joe</b>, Welcome</pre>
  41.   * If the name is put on the page without checking that it doesn't
  42.   * contain HTML code or without sanitizing that HTML code, the user
  43.   * could reformat the page, insert scripts, and control the the
  44.   * content on your web server.
  45.   *
  46.   * This method will replace HTML characters such as &gt; with their
  47.   * HTML entity reference (&amp;gt;) so that the html parser will
  48.   * be sure to interpret them as plain text rather than HTML or script.
  49.   *
  50.   * This method should be used for both data to be displayed in text
  51.   * in the html document, and data put in form elements. For example:<br>
  52.   * <code>&lt;html&gt;&lt;body&gt;<i>This in not a &amp;lt;tag&amp;gt;
  53.   * in HTML</i>&lt;/body&gt;&lt;/html&gt;</code><br>
  54.   * and<br>
  55.   * <code>&lt;form&gt;&lt;input type="hidden" name="date" value="<i>This data could
  56.   * be &amp;quot;malicious&amp;quot;</i>"&gt;&lt;/form&gt;</code><br>
  57.   * In the second example, the form data would be properly be resubmitted
  58.   * to your cgi script in the URLEncoded format:<br>
  59.   * <code><i>This data could be %22malicious%22</i></code>
  60.   *
  61.   * @param s String to be escaped
  62.   * @return escaped String
  63.   * @throws NullPointerException if s is null.
  64.   *
  65.   * @since ostermillerutils 1.00.00
  66.   */
  67. public static String escapeHTML(String s){
  68. int length = s.length();
  69. int newLength = length;
  70. boolean someCharacterEscaped = false;
  71. // first check for characters that might
  72. // be dangerous and calculate a length
  73. // of the string that has escapes.
  74. for (int i=0; i<length; i++){
  75. char c = s.charAt(i);
  76. int cint = 0xffff & c;
  77. if (cint < 32){
  78. switch(c){
  79. case '\r':
  80. case '\n':
  81. case '\t':
  82. case '\f':{
  83. } break;
  84. default: {
  85. newLength -= 1;
  86. someCharacterEscaped = true;
  87. }
  88. }
  89. } else {
  90. switch(c){
  91. case '\"':{
  92. newLength += 5;
  93. someCharacterEscaped = true;
  94. } break;
  95. case '&':
  96. case '\'':{
  97. newLength += 4;
  98. someCharacterEscaped = true;
  99. } break;
  100. case '<':
  101. case '>':{
  102. newLength += 3;
  103. someCharacterEscaped = true;
  104. } break;
  105. }
  106. }
  107. }
  108. if (!someCharacterEscaped){
  109. // nothing to escape in the string
  110. return s;
  111. }
  112. StringBuffer sb = new StringBuffer(newLength);
  113. for (int i=0; i<length; i++){
  114. char c = s.charAt(i);
  115. int cint = 0xffff & c;
  116. if (cint < 32){
  117. switch(c){
  118. case '\r':
  119. case '\n':
  120. case '\t':
  121. case '\f':{
  122. sb.append(c);
  123. } break;
  124. default: {
  125. // Remove this character
  126. }
  127. }
  128. } else {
  129. switch(c){
  130. case '\"':{
  131. sb.append("&quot;");
  132. } break;
  133. case '\'':{
  134. sb.append("&#39;");
  135. } break;
  136. case '&':{
  137. sb.append("&amp;");
  138. } break;
  139. case '<':{
  140. sb.append("&lt;");
  141. } break;
  142. case '>':{
  143. sb.append("&gt;");
  144. } break;
  145. default: {
  146. sb.append(c);
  147. }
  148. }
  149. }
  150. }
  151. return sb.toString();
  152. }
  153. }

Report this snippet  

You need to login to post a comment.