跨站点脚本攻击(XSS)防护 XSS HTMLFilter

来源:互联网 发布:阿里云服务器新手教程 编辑:程序博客网 时间:2024/06/07 05:06

 XSS HTMLFilter这是一个采用Java实现的开源类库。用于分析用户提交的输入,消除潜在的跨站点脚本攻击(XSS),恶意的HTML,或简单的HTML格式错误。

示例代码:

// retrieve input from user...String input = ...String clean = new HTMLInputFilter().filter( input );

 

该项目主页:http://xss-html-filter.sourceforge.net/ 

package net.sf.xsshtmlfilter; 2  3 import java.util.ArrayList; 4 import java.util.Collections; 5 import java.util.HashMap; 6 import java.util.List; 7 import java.util.Map; 8 import java.util.concurrent.ConcurrentHashMap; 9 import java.util.concurrent.ConcurrentMap; 10 import java.util.logging.Logger; 11 import java.util.regex.Matcher; 12 import java.util.regex.Pattern; 13  14 /** 15  * 16  * HTML filtering utility for protecting against XSS (Cross Site Scripting). 17  * 18  * This code is licensed LGPLv3 19  * 20  * This code is a Java port of the original work in PHP by Cal Hendersen. 21  * http://code.iamcal.com/php/lib_filter/ 22  * 23  * The trickiest part of the translation was handling the differences in regex handling 24  * between PHP and Java.  These resources were helpful in the process: 25  * 26  * http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html 27  * http://us2.php.net/manual/en/reference.pcre.pattern.modifiers.php 28  * http://www.regular-expressions.info/modifiers.html 29  * 30  * A note on naming conventions: instance variables are prefixed with a "v"; global 31  * constants are in all caps. 32  * 33  * Sample use: 34  * String input = ... 35  * String clean = new HTMLFilter().filter( input ); 36  * 37  * The class is not thread safe. Create a new instance if in doubt. 38  * 39  * If you find bugs or have suggestions on improvement (especially regarding 40  * performance), please contact us.  The latest version of this 41  * source, and our contact details, can be found at http://xss-html-filter.sf.net 42  * 43  * @author Joseph O'Connell 44  * @author Cal Hendersen 45  * @author Michael Semb Wever 46  */ 47 public final class HTMLFilter { 48  49     /** regex flag union representing /si modifiers in php **/ 50     private static final int REGEX_FLAGS_SI = Pattern.CASE_INSENSITIVE | Pattern.DOTALL; 51     private static final Pattern P_COMMENTS = Pattern.compile("<!--(.*?)-->", Pattern.DOTALL); 52     private static final Pattern P_COMMENT = Pattern.compile("^!--(.*)--{1}quot;, REGEX_FLAGS_SI); 53     private static final Pattern P_TAGS = Pattern.compile("<(.*?)>", Pattern.DOTALL); 54     private static final Pattern P_END_TAG = Pattern.compile("^/([a-z0-9]+)", REGEX_FLAGS_SI); 55     private static final Pattern P_START_TAG = Pattern.compile("^([a-z0-9]+)(.*?)(/?){1}quot;, REGEX_FLAGS_SI); 56     private static final Pattern P_QUOTED_ATTRIBUTES = Pattern.compile("([a-z0-9]+)=([\"'])(.*?)\\2", REGEX_FLAGS_SI); 57     private static final Pattern P_UNQUOTED_ATTRIBUTES = Pattern.compile("([a-z0-9]+)(=)([^\"\\s']+)", REGEX_FLAGS_SI); 58     private static final Pattern P_PROTOCOL = Pattern.compile("^([^:]+):", REGEX_FLAGS_SI); 59     private static final Pattern P_ENTITY = Pattern.compile("&#(\\d+);?"); 60     private static final Pattern P_ENTITY_UNICODE = Pattern.compile("&#x([0-9a-f]+);?"); 61     private static final Pattern P_ENCODE = Pattern.compile("%([0-9a-f]{2});?"); 62     private static final Pattern P_VALID_ENTITIES = Pattern.compile("&([^&;]*)(?=(;|&|$))"); 63     private static final Pattern P_VALID_QUOTES = Pattern.compile("(>|^)([^<]+?)(<|$)", Pattern.DOTALL); 64     private static final Pattern P_END_ARROW = Pattern.compile("^>"); 65     private static final Pattern P_BODY_TO_END = Pattern.compile("<([^>]*?)(?=<|$)"); 66     private static final Pattern P_XML_CONTENT = Pattern.compile("(^|>)([^<]*?)(?=>)"); 67     private static final Pattern P_STRAY_LEFT_ARROW = Pattern.compile("<([^>]*?)(?=<|$)"); 68     private static final Pattern P_STRAY_RIGHT_ARROW = Pattern.compile("(^|>)([^<]*?)(?=>)"); 69     private static final Pattern P_AMP = Pattern.compile("&"); 70     private static final Pattern P_QUOTE = Pattern.compile("\""); 71     private static final Pattern P_LEFT_ARROW = Pattern.compile("<"); 72     private static final Pattern P_RIGHT_ARROW = Pattern.compile(">"); 73     private static final Pattern P_BOTH_ARROWS = Pattern.compile("<>"); 74  75     // @xxx could grow large... maybe use sesat's ReferenceMap 76     private static final ConcurrentMap<String,Pattern> P_REMOVE_PAIR_BLANKS = new ConcurrentHashMap<String, Pattern>(); 77     private static final ConcurrentMap<String,Pattern> P_REMOVE_SELF_BLANKS = new ConcurrentHashMap<String, Pattern>(); 78  79     /** set of allowed html elements, along with allowed attributes for each element **/ 80     private final Map<String, List<String>> vAllowed; 81     /** counts of open tags for each (allowable) html element **/ 82     private final Map<String, Integer> vTagCounts = new HashMap<String, Integer>(); 83  84     /** html elements which must always be self-closing (e.g. "<img />") **/ 85     private final String[] vSelfClosingTags; 86     /** html elements which must always have separate opening and closing tags (e.g. "<b></b>") **/ 87     private final String[] vNeedClosingTags; 88     /** set of disallowed html elements **/ 89     private final String[] vDisallowed; 90     /** attributes which should be checked for valid protocols **/ 91     private final String[] vProtocolAtts; 92     /** allowed protocols **/ 93     private final String[] vAllowedProtocols; 94     /** tags which should be removed if they contain no content (e.g. "<b></b>" or "<b />") **/ 95     private final String[] vRemoveBlanks; 96     /** entities allowed within html markup **/ 97     private final String[] vAllowedEntities; 98     /** flag determining whether comments are allowed in input String. */ 99     private final boolean stripComment; 100     private boolean vDebug = false; 101     /** 102      * flag determining whether to try to make tags when presented with "unbalanced" 103      * angle brackets (e.g. "<b text </b>" becomes "<b> text </b>").  If set to false, 104      * unbalanced angle brackets will be html escaped. 105      */ 106     private final boolean alwaysMakeTags; 107  108     /** Default constructor. 109      * 110      */ 111     public HTMLFilter() { 112         vAllowed = new HashMap<String, List<String>>(); 113  114         final ArrayList<String> a_atts = new ArrayList<String>(); 115         a_atts.add("href"); 116         a_atts.add("target"); 117         vAllowed.put("a", a_atts); 118  119         final ArrayList<String> img_atts = new ArrayList<String>(); 120         img_atts.add("src"); 121         img_atts.add("width"); 122         img_atts.add("height"); 123         img_atts.add("alt"); 124         vAllowed.put("img", img_atts); 125  126         final ArrayList<String> no_atts = new ArrayList<String>(); 127         vAllowed.put("b", no_atts); 128         vAllowed.put("strong", no_atts); 129         vAllowed.put("i", no_atts); 130         vAllowed.put("em", no_atts); 131  132         vSelfClosingTags = new String[]{"img"}; 133         vNeedClosingTags = new String[]{"a", "b", "strong", "i", "em"}; 134         vDisallowed = new String[]{}; 135         vAllowedProtocols = new String[]{"http", "mailto"}; // no ftp. 136         vProtocolAtts = new String[]{"src", "href"}; 137         vRemoveBlanks = new String[]{"a", "b", "strong", "i", "em"}; 138         vAllowedEntities = new String[]{"amp", "gt", "lt", "quot"}; 139         stripComment = true; 140         alwaysMakeTags = true; 141     } 142  143     /** Set debug flag to true. Otherwise use default settings. See the default constructor. 144      * 145      * @param debug turn debug on with a true argument 146      */ 147     public HTMLFilter(final boolean debug) { 148         this(); 149         vDebug = debug; 150  151     } 152  153     /** Map-parameter configurable constructor. 154      * 155      * @param configuration map containing configuration. keys match field names. 156      */ 157     public HTMLFilter(final Map<String,Object> configuration) { 158  159         assert configuration.containsKey("vAllowed") : "configuration requires vAllowed"; 160         assert configuration.containsKey("vSelfClosingTags") : "configuration requires vSelfClosingTags"; 161         assert configuration.containsKey("vNeedClosingTags") : "configuration requires vNeedClosingTags"; 162         assert configuration.containsKey("vDisallowed") : "configuration requires vDisallowed"; 163         assert configuration.containsKey("vAllowedProtocols") : "configuration requires vAllowedProtocols"; 164         assert configuration.containsKey("vProtocolAtts") : "configuration requires vProtocolAtts"; 165         assert configuration.containsKey("vRemoveBlanks") : "configuration requires vRemoveBlanks"; 166         assert configuration.containsKey("vAllowedEntities") : "configuration requires vAllowedEntities"; 167         assert configuration.containsKey("stripComment") : "configuration requires stripComment"; 168         assert configuration.containsKey("alwaysMakeTags") : "configuration requires alwaysMakeTags"; 169  170         vAllowed = Collections.unmodifiableMap((HashMap<String, List<String>>) configuration.get("vAllowed")); 171         vSelfClosingTags = (String[]) configuration.get("vSelfClosingTags"); 172         vNeedClosingTags = (String[]) configuration.get("vNeedClosingTags"); 173         vDisallowed = (String[]) configuration.get("vDisallowed"); 174         vAllowedProtocols = (String[]) configuration.get("vAllowedProtocols"); 175         vProtocolAtts = (String[]) configuration.get("vProtocolAtts"); 176         vRemoveBlanks = (String[]) configuration.get("vRemoveBlanks"); 177         vAllowedEntities = (String[]) configuration.get("vAllowedEntities"); 178         stripComment = (Boolean) configuration.get("stripComment"); 179         alwaysMakeTags = (Boolean) configuration.get("alwaysMakeTags"); 180     } 181  182     private void reset() { 183         vTagCounts.clear(); 184     } 185  186     private void debug(final String msg) { 187         if (vDebug) { 188             Logger.getAnonymousLogger().info(msg); 189         } 190     } 191  192     //--------------------------------------------------------------- 193     // my versions of some PHP library functions 194     public static String chr(final int decimal) { 195         return String.valueOf((char) decimal); 196     } 197  198     public static String htmlSpecialChars(final String s) { 199         String result = s; 200         result = regexReplace(P_AMP, "&", result); 201         result = regexReplace(P_QUOTE, """, result); 202         result = regexReplace(P_LEFT_ARROW, "<", result); 203         result = regexReplace(P_RIGHT_ARROW, ">", result); 204         return result; 205     } 206  207     //--------------------------------------------------------------- 208     /** 209      * given a user submitted input String, filter out any invalid or restricted 210      * html. 211      * 212      * @param input text (i.e. submitted by a user) than may contain html 213      * @return "clean" version of input, with only valid, whitelisted html elements allowed 214      */ 215     public String filter(final String input) { 216         reset(); 217         String s = input; 218  219         debug("************************************************"); 220         debug("              INPUT: " + input); 221  222         s = escapeComments(s); 223         debug("     escapeComments: " + s); 224  225         s = balanceHTML(s); 226         debug("        balanceHTML: " + s); 227  228         s = checkTags(s); 229         debug("          checkTags: " + s); 230  231         s = processRemoveBlanks(s); 232         debug("processRemoveBlanks: " + s); 233  234         s = validateEntities(s); 235         debug("    validateEntites: " + s); 236  237         debug("************************************************\n\n"); 238         return s; 239     } 240  241     public boolean isAlwaysMakeTags(){ 242         return alwaysMakeTags; 243     } 244  245     public boolean isStripComments(){ 246         return stripComment; 247     } 248  249     private String escapeComments(final String s) { 250         final Matcher m = P_COMMENTS.matcher(s); 251         final StringBuffer buf = new StringBuffer(); 252         if (m.find()) { 253             final String match = m.group(1); //(.*?) 254             m.appendReplacement(buf, Matcher.quoteReplacement("<!--" + htmlSpecialChars(match) + "-->")); 255         } 256         m.appendTail(buf); 257  258         return buf.toString(); 259     } 260  261     private String balanceHTML(String s) { 262         if (alwaysMakeTags) { 263             // 264             // try and form html 265             // 266             s = regexReplace(P_END_ARROW, "", s); 267             s = regexReplace(P_BODY_TO_END, "<$1>", s); 268             s = regexReplace(P_XML_CONTENT, "$1<$2", s); 269  270         } else { 271             // 272             // escape stray brackets 273             // 274             s = regexReplace(P_STRAY_LEFT_ARROW, "<$1", s); 275             s = regexReplace(P_STRAY_RIGHT_ARROW, "$1$2><", s); 276  277             // 278             // the last regexp causes '<>' entities to appear 279             // (we need to do a lookahead assertion so that the last bracket can 280             // be used in the next pass of the regexp) 281             // 282             s = regexReplace(P_BOTH_ARROWS, "", s); 283         } 284  285         return s; 286     } 287  288     private String checkTags(String s) { 289         Matcher m = P_TAGS.matcher(s); 290  291         final StringBuffer buf = new StringBuffer(); 292         while (m.find()) { 293             String replaceStr = m.group(1); 294             replaceStr = processTag(replaceStr); 295             m.appendReplacement(buf, Matcher.quoteReplacement(replaceStr)); 296         } 297         m.appendTail(buf); 298  299         s = buf.toString(); 300  301         // these get tallied in processTag 302         // (remember to reset before subsequent calls to filter method) 303         for (String key : vTagCounts.keySet()) { 304             for (int ii = 0; ii < vTagCounts.get(key); ii++) { 305                 s += "</" + key + ">"; 306             } 307         } 308  309         return s; 310     } 311  312     private String processRemoveBlanks(final String s) { 313         String result = s; 314         for (String tag : vRemoveBlanks) { 315             if(!P_REMOVE_PAIR_BLANKS.containsKey(tag)){ 316                 P_REMOVE_PAIR_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?></" + tag + ">")); 317             } 318             result = regexReplace(P_REMOVE_PAIR_BLANKS.get(tag), "", result); 319             if(!P_REMOVE_SELF_BLANKS.containsKey(tag)){ 320                 P_REMOVE_SELF_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?/>")); 321             } 322             result = regexReplace(P_REMOVE_SELF_BLANKS.get(tag), "", result); 323         } 324  325         return result; 326     } 327  328     private static String regexReplace(final Pattern regex_pattern, final String replacement, final String s) { 329         Matcher m = regex_pattern.matcher(s); 330         return m.replaceAll(replacement); 331     } 332  333     private String processTag(final String s) { 334         // ending tags 335         Matcher m = P_END_TAG.matcher(s); 336         if (m.find()) { 337             final String name = m.group(1).toLowerCase(); 338             if (allowed(name)) { 339                 if (!inArray(name, vSelfClosingTags)) { 340                     if (vTagCounts.containsKey(name)) { 341                         vTagCounts.put(name, vTagCounts.get(name) - 1); 342                         return "</" + name + ">"; 343                     } 344                 } 345             } 346         } 347  348         // starting tags 349         m = P_START_TAG.matcher(s); 350         if (m.find()) { 351             final String name = m.group(1).toLowerCase(); 352             final String body = m.group(2); 353             String ending = m.group(3); 354  355             //debug( "in a starting tag, name='" + name + "'; body='" + body + "'; ending='" + ending + "'" ); 356             if (allowed(name)) { 357                 String params = ""; 358  359                 final Matcher m2 = P_QUOTED_ATTRIBUTES.matcher(body); 360                 final Matcher m3 = P_UNQUOTED_ATTRIBUTES.matcher(body); 361                 final List<String> paramNames = new ArrayList<String>(); 362                 final List<String> paramValues = new ArrayList<String>(); 363                 while (m2.find()) { 364                     paramNames.add(m2.group(1)); //([a-z0-9]+) 365                     paramValues.add(m2.group(3)); //(.*?) 366                 } 367                 while (m3.find()) { 368                     paramNames.add(m3.group(1)); //([a-z0-9]+) 369                     paramValues.add(m3.group(3)); //([^\"\\s']+) 370                 } 371  372                 String paramName, paramValue; 373                 for (int ii = 0; ii < paramNames.size(); ii++) { 374                     paramName = paramNames.get(ii).toLowerCase(); 375                     paramValue = paramValues.get(ii); 376  377 //          debug( "paramName='" + paramName + "'" ); 378 //          debug( "paramValue='" + paramValue + "'" ); 379 //          debug( "allowed? " + vAllowed.get( name ).contains( paramName ) ); 380  381                     if (allowedAttribute(name, paramName)) { 382                         if (inArray(paramName, vProtocolAtts)) { 383                             paramValue = processParamProtocol(paramValue); 384                         } 385                         params += " " + paramName + "=\"" + paramValue + "\""; 386                     } 387                 } 388  389                 if (inArray(name, vSelfClosingTags)) { 390                     ending = " /"; 391                 } 392  393                 if (inArray(name, vNeedClosingTags)) { 394                     ending = ""; 395                 } 396  397                 if (ending == null || ending.length() < 1) { 398                     if (vTagCounts.containsKey(name)) { 399                         vTagCounts.put(name, vTagCounts.get(name) + 1); 400                     } else { 401                         vTagCounts.put(name, 1); 402                     } 403                 } else { 404                     ending = " /"; 405                 } 406                 return "<" + name + params + ending + ">"; 407             } else { 408                 return ""; 409             } 410         } 411  412         // comments 413         m = P_COMMENT.matcher(s); 414         if (!stripComment && m.find()) { 415             return  "<" + m.group() + ">"; 416         } 417  418         return ""; 419     } 420  421     private String processParamProtocol(String s) { 422         s = decodeEntities(s); 423         final Matcher m = P_PROTOCOL.matcher(s); 424         if (m.find()) { 425             final String protocol = m.group(1); 426             if (!inArray(protocol, vAllowedProtocols)) { 427                 // bad protocol, turn into local anchor link instead 428                 s = "#" + s.substring(protocol.length() + 1, s.length()); 429                 if (s.startsWith("#//")) { 430                     s = "#" + s.substring(3, s.length()); 431                 } 432             } 433         } 434  435         return s; 436     } 437  438     private String decodeEntities(String s) { 439         StringBuffer buf = new StringBuffer(); 440  441         Matcher m = P_ENTITY.matcher(s); 442         while (m.find()) { 443             final String match = m.group(1); 444             final int decimal = Integer.decode(match).intValue(); 445             m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal))); 446         } 447         m.appendTail(buf); 448         s = buf.toString(); 449  450         buf = new StringBuffer(); 451         m = P_ENTITY_UNICODE.matcher(s); 452         while (m.find()) { 453             final String match = m.group(1); 454             final int decimal = Integer.valueOf(match, 16).intValue(); 455             m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal))); 456         } 457         m.appendTail(buf); 458         s = buf.toString(); 459  460         buf = new StringBuffer(); 461         m = P_ENCODE.matcher(s); 462         while (m.find()) { 463             final String match = m.group(1); 464             final int decimal = Integer.valueOf(match, 16).intValue(); 465             m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal))); 466         } 467         m.appendTail(buf); 468         s = buf.toString(); 469  470         s = validateEntities(s); 471         return s; 472     } 473  474     private String validateEntities(String s) { 475         StringBuffer buf = new StringBuffer(); 476  477         // validate entities throughout the string 478         Matcher m = P_VALID_ENTITIES.matcher(s); 479         while (m.find()) { 480             final String one = m.group(1); //([^&;]*) 481             final String two = m.group(2); //(?=(;|&|$)) 482             m.appendReplacement(buf, Matcher.quoteReplacement(checkEntity(one, two))); 483         } 484         m.appendTail(buf); 485         s = buf.toString(); 486  487         // validate quotes outside of tags 488         buf = new StringBuffer(); 489         m = P_VALID_QUOTES.matcher(s); 490         while (m.find()) { 491             final String one = m.group(1); //(>|^) 492             final String two = m.group(2); //([^<]+?) 493             final String three = m.group(3); //(<|$) 494             m.appendReplacement(buf, Matcher.quoteReplacement(one + regexReplace(P_QUOTE, """, two) + three)); 495         } 496         m.appendTail(buf); 497         s = buf.toString(); 498  499         return s; 500     } 501  502     private String checkEntity(final String preamble, final String term) { 503  504         return ";".equals(term) && isValidEntity(preamble) 505                 ? '&' + preamble 506                 : "&" + preamble; 507     } 508  509     private boolean isValidEntity(final String entity) { 510         return inArray(entity, vAllowedEntities); 511     } 512  513     private static boolean inArray(final String s, final String[] array) { 514         for (String item : array) { 515             if (item != null && item.equals(s)) { 516                 return true; 517             } 518         } 519         return false; 520     } 521  522     private boolean allowed(final String name) { 523         return (vAllowed.isEmpty() || vAllowed.containsKey(name)) && !inArray(name, vDisallowed); 524     } 525  526     private boolean allowedAttribute(final String name, final String paramName) { 527         return allowed(name) && (vAllowed.isEmpty() || vAllowed.get(name).contains(paramName)); 528     } 529 } 


原创粉丝点击