001 // Copyright (c) 2011, Mike Samuel 002 // All rights reserved. 003 // 004 // Redistribution and use in source and binary forms, with or without 005 // modification, are permitted provided that the following conditions 006 // are met: 007 // 008 // Redistributions of source code must retain the above copyright 009 // notice, this list of conditions and the following disclaimer. 010 // Redistributions in binary form must reproduce the above copyright 011 // notice, this list of conditions and the following disclaimer in the 012 // documentation and/or other materials provided with the distribution. 013 // Neither the name of the OWASP nor the names of its contributors may 014 // be used to endorse or promote products derived from this software 015 // without specific prior written permission. 016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 019 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 020 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 021 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 022 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 023 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 024 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 025 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 026 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 027 // POSSIBILITY OF SUCH DAMAGE. 028 029 package org.owasp.html; 030 031 import java.util.List; 032 import java.util.Map; 033 import java.util.Set; 034 import java.util.regex.Pattern; 035 036 import javax.annotation.Nullable; 037 import javax.annotation.concurrent.NotThreadSafe; 038 039 import com.google.common.base.Predicate; 040 import com.google.common.collect.ImmutableList; 041 import com.google.common.collect.ImmutableMap; 042 import com.google.common.collect.ImmutableSet; 043 import com.google.common.collect.Maps; 044 import com.google.common.collect.Sets; 045 046 047 /** 048 * Conveniences for configuring policies for the {@link HtmlSanitizer}. 049 * 050 * <h3>Usage</h3> 051 * <p> 052 * To create a policy, first construct an instance of this class; then call 053 * <code>allow…</code> methods to turn on tags, attributes, and other 054 * processing modes; and finally call <code>build(renderer)</code> or 055 * <code>toFactory()</code>. 056 * </p> 057 * <pre class="prettyprint lang-java"> 058 * // Define the policy. 059 * Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> policy 060 * = new HtmlPolicyBuilder() 061 * .allowElements("a", "p") 062 * .allowAttributes("href").onElements("a") 063 * .toFactory(); 064 * 065 * // Sanitize your output. 066 * HtmlSanitizer.sanitize(myHtml, policy.apply(myHtmlStreamRenderer)); 067 * </pre> 068 * 069 * <h3>Embedded Content</h3> 070 * <p> 071 * Embedded URLs are filtered by 072 * {@link HtmlPolicyBuilder#allowUrlProtocols protocol}. 073 * There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy} 074 * so you can easily white-list widely used policies that don't violate the 075 * current pages origin. See "Customization" below for ways to do further 076 * filtering. If you allow links it might be worthwhile to 077 * {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require} 078 * {@code rel=nofollow}. 079 * </p> 080 * <p> 081 * This class simply throws out all embedded JS. 082 * Use a custom element or attribute policy to allow through 083 * signed or otherwise known-safe code. 084 * Check out the Caja project if you need a way to contain third-party JS. 085 * </p> 086 * <p> 087 * This class does not attempt to faithfully parse and sanitize CSS. 088 * It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option 089 * that allows through a few CSS properties that allow textual styling, but that 090 * disallow image loading, history stealing, layout breaking, code execution, 091 * etc. 092 * </p> 093 * 094 * <h3>Customization</h3> 095 * <p> 096 * You can easily do custom processing on tags and attributes by supplying your 097 * own {@link ElementPolicy element policy} or 098 * {@link AttributePolicy attribute policy} when calling 099 * <code>allow…</code>. 100 * E.g. to convert headers into {@code <div>}s, you could use an element policy 101 * </p> 102 * <pre class="prettyprint lang-java"> 103 * new HtmlPolicyBuilder() 104 * .allowElement( 105 * new ElementPolicy() { 106 * public String apply(String elementName, List<String> attributes) { 107 * attributes.add("class"); 108 * attributes.add("header-" + elementName); 109 * return "div"; 110 * } 111 * }, 112 * "h1", "h2", "h3", "h4", "h5", "h6") 113 * .build(outputChannel) 114 * </pre> 115 * 116 * <h3>Rules of Thumb</h3> 117 * <p> 118 * Throughout this class, several rules hold: 119 * <ul> 120 * <li>Everything is denied by default. There are 121 * <code>disallow…</code> methods, but those reverse 122 * allows instead of rolling back overly permissive defaults. 123 * <li>The order of allows and disallows does not matter. 124 * Disallows trump allows whether they occur before or after them. 125 * The only method that needs to be called in a particular place is 126 * {@link HtmlPolicyBuilder#build}. 127 * Allows or disallows after {@code build} is called have no 128 * effect on the already built policy. 129 * <li>Element and attribute policies are applied in the following order: 130 * element specific attribute policy, global attribute policy, element 131 * policy. 132 * Element policies come last so they can observe all the post-processed 133 * attributes, and so they can add attributes that are exempt from 134 * attribute policies. 135 * Element specific policies go first, so they can normalize content to 136 * a form that might be acceptable to a more simplistic global policy. 137 * </ul> 138 * 139 * <h3>Thread safety and efficiency</h3> 140 * <p> 141 * This class is not thread-safe. The resulting policy will not violate its 142 * security guarantees as a result of race conditions, but is not thread safe 143 * because it maintains state to track whether text inside disallowed elements 144 * should be suppressed. 145 * <p> 146 * The resulting policy can be reused, but if you use the 147 * {@link HtmlPolicyBuilder#toFactory()} method instead of {@link #build}, then 148 * binding policies to output channels is cheap so there's no need. 149 * </p> 150 * 151 * @author Mike Samuel <mikesamuel@gmail.com> 152 */ 153 @TCB 154 @NotThreadSafe 155 public class HtmlPolicyBuilder { 156 /** 157 * The default set of elements that are removed if they have no attributes. 158 * Since {@code <img>} is in this set, by default, a policy will remove 159 * {@code <img src=javascript:alert(1337)>} because its URL is not allowed 160 * and it has no other attributes that would warrant it appearing in the 161 * output. 162 */ 163 public static final ImmutableSet<String> DEFAULT_SKIP_IF_EMPTY 164 = ImmutableSet.of("a", "font", "img", "input", "span"); 165 166 private final Map<String, ElementPolicy> elPolicies = Maps.newLinkedHashMap(); 167 private final Map<String, Map<String, AttributePolicy>> attrPolicies 168 = Maps.newLinkedHashMap(); 169 private final Map<String, AttributePolicy> globalAttrPolicies 170 = Maps.newLinkedHashMap(); 171 private final Set<String> allowedProtocols = Sets.newLinkedHashSet(); 172 private final Set<String> skipIfEmpty = Sets.newLinkedHashSet( 173 DEFAULT_SKIP_IF_EMPTY); 174 private final Map<String, Boolean> textContainers = Maps.newLinkedHashMap(); 175 private boolean requireRelNofollowOnLinks; 176 177 /** 178 * Allows the named elements. 179 */ 180 public HtmlPolicyBuilder allowElements(String... elementNames) { 181 return allowElements(ElementPolicy.IDENTITY_ELEMENT_POLICY, elementNames); 182 } 183 184 /** 185 * Disallows the named elements. Elements are disallowed by default, so 186 * there is no need to disallow elements, unless you are making an exception 187 * based on an earlier allow. 188 */ 189 public HtmlPolicyBuilder disallowElements(String... elementNames) { 190 return allowElements(ElementPolicy.REJECT_ALL_ELEMENT_POLICY, elementNames); 191 } 192 193 /** 194 * Allow the given elements with the given policy. 195 * 196 * @param policy May remove or add attributes, change the element name, or 197 * deny the element. 198 */ 199 public HtmlPolicyBuilder allowElements( 200 ElementPolicy policy, String... elementNames) { 201 invalidateCompiledState(); 202 for (String elementName : elementNames) { 203 elementName = HtmlLexer.canonicalName(elementName); 204 ElementPolicy newPolicy = ElementPolicy.Util.join( 205 elPolicies.get(elementName), policy); 206 // Don't remove if newPolicy is the always reject policy since we want 207 // that to infect later allowElement calls for this particular element 208 // name. rejects should have higher priority than allows. 209 elPolicies.put(elementName, newPolicy); 210 if (!textContainers.containsKey(elementName) 211 && TagBalancingHtmlStreamEventReceiver 212 .allowsPlainTextualContent(elementName)) { 213 textContainers.put(elementName, true); 214 } 215 } 216 return this; 217 } 218 219 /** 220 * A canned policy that allows a number of common formatting elements. 221 */ 222 public HtmlPolicyBuilder allowCommonInlineFormattingElements() { 223 return allowElements( 224 "b", "i", "font", "s", "u", "o", "sup", "sub", "ins", "del", "strong", 225 "strike", "tt", "code", "big", "small", "br", "span"); 226 } 227 228 /** 229 * A canned policy that allows a number of common block elements. 230 */ 231 public HtmlPolicyBuilder allowCommonBlockElements() { 232 return allowElements( 233 "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "li", 234 "blockquote"); 235 } 236 237 /** 238 * Allows text content in the named elements. 239 * By default, text content is allowed in any 240 * {@link #allowElements allowed elements} that can contain character data per 241 * the HTML5 spec, but text content is not allowed by default in elements that 242 * contain content of other kinds (like JavaScript in {@code <script>} 243 * elements. 244 * <p> 245 * To write a policy that whitelists {@code <script>} or {@code <style>} 246 * elements, first {@code allowTextIn("script")}. 247 */ 248 public HtmlPolicyBuilder allowTextIn(String... elementNames) { 249 invalidateCompiledState(); 250 for (String elementName : elementNames) { 251 elementName = HtmlLexer.canonicalName(elementName); 252 textContainers.put(elementName, true); 253 } 254 return this; 255 } 256 257 public HtmlPolicyBuilder disallowTextIn(String... elementNames) { 258 invalidateCompiledState(); 259 for (String elementName : elementNames) { 260 elementName = HtmlLexer.canonicalName(elementName); 261 textContainers.put(elementName, false); 262 } 263 return this; 264 } 265 266 /** 267 * Assuming the given elements are allowed, allows them to appear without 268 * attributes. 269 * 270 * @see #DEFAULT_SKIP_IF_EMPTY 271 * @see #disallowWithoutAttributes 272 */ 273 public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) { 274 invalidateCompiledState(); 275 for (String elementName : elementNames) { 276 elementName = HtmlLexer.canonicalName(elementName); 277 skipIfEmpty.remove(elementName); 278 } 279 return this; 280 } 281 282 /** 283 * Disallows the given elements from appearing without attributes. 284 * 285 * @see #DEFAULT_SKIP_IF_EMPTY 286 * @see #allowWithoutAttributes 287 */ 288 public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) { 289 invalidateCompiledState(); 290 for (String elementName : elementNames) { 291 elementName = HtmlLexer.canonicalName(elementName); 292 skipIfEmpty.add(elementName); 293 } 294 return this; 295 } 296 297 /** 298 * Returns an object that lets you associate policies with the given 299 * attributes, and allow them globally or on specific elements. 300 */ 301 public AttributeBuilder allowAttributes(String... attributeNames) { 302 ImmutableList.Builder<String> b = ImmutableList.builder(); 303 for (String attributeName : attributeNames) { 304 b.add(HtmlLexer.canonicalName(attributeName)); 305 } 306 return new AttributeBuilder(b.build()); 307 } 308 309 /** 310 * Reverse an earlier attribute {@link #allowAttributes allow}. 311 * <p> 312 * For this to have an effect you must call at least one of 313 * {@link AttributeBuilder#globally} and {@link AttributeBuilder#onElements}. 314 * <p> 315 * Attributes are disallowed by default, so there is no need to call this 316 * with a laundry list of attribute/element pairs. 317 */ 318 public AttributeBuilder disallowAttributes(String... attributeNames) { 319 return this.allowAttributes(attributeNames) 320 .matching(AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY); 321 } 322 323 324 private HtmlPolicyBuilder allowAttributesGlobally( 325 AttributePolicy policy, List<String> attributeNames) { 326 invalidateCompiledState(); 327 for (String attributeName : attributeNames) { 328 // We reinterpret the identity policy later via policy joining since its 329 // the default passed from the policy-less method, but we don't do 330 // anything here since we don't know until build() is called whether the 331 // policy author wants to allow certain URL protocols or wants to deal 332 // with styles. 333 AttributePolicy oldPolicy = globalAttrPolicies.get(attributeName); 334 globalAttrPolicies.put( 335 attributeName, AttributePolicy.Util.join(oldPolicy, policy)); 336 } 337 return this; 338 } 339 340 private HtmlPolicyBuilder allowAttributesOnElements( 341 AttributePolicy policy, List<String> attributeNames, 342 List<String> elementNames) { 343 invalidateCompiledState(); 344 for (String elementName : elementNames) { 345 Map<String, AttributePolicy> policies = attrPolicies.get(elementName); 346 if (policies == null) { 347 policies = Maps.newLinkedHashMap(); 348 attrPolicies.put(elementName, policies); 349 } 350 for (String attributeName : attributeNames) { 351 AttributePolicy oldPolicy = policies.get(attributeName); 352 policies.put( 353 attributeName, 354 AttributePolicy.Util.join(oldPolicy, policy)); 355 } 356 } 357 return this; 358 } 359 360 /** 361 * Adds <a href="http://en.wikipedia.org/wiki/Nofollow"><code>rel=nofollow</code></a> 362 * to links. 363 */ 364 public HtmlPolicyBuilder requireRelNofollowOnLinks() { 365 invalidateCompiledState(); 366 this.requireRelNofollowOnLinks = true; 367 return this; 368 } 369 370 /** 371 * Adds to the set of protocols that are allowed in URL attributes. 372 * For each URL attribute that is allowed, we further constrain it by 373 * only allowing the value through if it specifies no protocol, or if it 374 * specifies one in the allowedProtocols white-list. 375 * This is done regardless of whether any protocols have been allowed, so 376 * allowing the attribute "href" globally with the identity policy but 377 * not white-listing any protocols, effectively disallows the "href" 378 * attribute globally. 379 * <p> 380 * Do not allow any <code>*script</code> such as <code>javascript</code> 381 * protocols if you might use this policy with untrusted code. 382 */ 383 public HtmlPolicyBuilder allowUrlProtocols(String... protocols) { 384 invalidateCompiledState(); 385 // If there is at least one allowed protocol, then allow URLs and 386 // add a filter that checks href and src values. 387 388 // Do not allow href and srcs through otherwise, and only allow on images 389 // and links. 390 for (String protocol : protocols) { 391 protocol = Strings.toLowerCase(protocol); 392 allowedProtocols.add(protocol); 393 } 394 return this; 395 } 396 397 /** 398 * Reverses a decision made by {@link #allowUrlProtocols}. 399 */ 400 public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) { 401 invalidateCompiledState(); 402 for (String protocol : protocols) { 403 protocol = Strings.toLowerCase(protocol); 404 allowedProtocols.remove(protocol); 405 } 406 return this; 407 } 408 409 /** 410 * A canned URL protocol policy that allows <code>http</code>, 411 * <code>https</code>, and <code>mailto</code>. 412 */ 413 public HtmlPolicyBuilder allowStandardUrlProtocols() { 414 return allowUrlProtocols("http", "https", "mailto"); 415 } 416 417 /** 418 * Convert <code>style="<CSS>"</code> to sanitized CSS which allows 419 * color, font-size, type-face, and other styling using the default schema; 420 * but which does not allow content to escape its clipping context. 421 */ 422 public HtmlPolicyBuilder allowStyling() { 423 allowStyling(CssSchema.DEFAULT); 424 return this; 425 } 426 427 /** 428 * Convert <code>style="<CSS>"</code> to sanitized CSS which allows 429 * color, font-size, type-face, and other styling using the given schema. 430 */ 431 public HtmlPolicyBuilder allowStyling(CssSchema whitelist) { 432 invalidateCompiledState(); 433 allowAttributesGlobally( 434 new StylingPolicy(whitelist), ImmutableList.of("style")); 435 return this; 436 } 437 438 /** 439 * Names of attributes from HTML 4 whose values are URLs. 440 * Other attributes, e.g. <code>style</code> may contain URLs even though 441 * there values are not URLs. 442 */ 443 private static final Set<String> URL_ATTRIBUTE_NAMES = ImmutableSet.of( 444 "action", "archive", "background", "cite", "classid", "codebase", "data", 445 "dsync", "formaction", "href", "icon", "longdesc", "manifest", "poster", 446 "profile", "src", "usemap"); 447 448 /** 449 * Produces a policy based on the allow and disallow calls previously made. 450 * 451 * @param out receives calls to open only tags allowed by 452 * previous calls to this object. 453 * Typically a {@link HtmlStreamRenderer}. 454 */ 455 public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) { 456 return toFactory().apply(out); 457 } 458 459 /** 460 * Produces a policy based on the allow and disallow calls previously made. 461 * 462 * @param out receives calls to open only tags allowed by 463 * previous calls to this object. 464 * Typically a {@link HtmlStreamRenderer}. 465 * @param listener is notified of dropped tags and attributes so that 466 * intrusion detection systems can be alerted to questionable HTML. 467 * If {@code null} then no notifications are sent. 468 * @param context if {@code (listener != null)} then the context value passed 469 * with alerts. This can be used to let the listener know from which 470 * connection or request the questionable HTML was received. 471 */ 472 public <CTX> HtmlSanitizer.Policy build( 473 HtmlStreamEventReceiver out, 474 @Nullable HtmlChangeListener<? super CTX> listener, 475 @Nullable CTX context) { 476 return toFactory().apply(out, listener, context); 477 } 478 479 /** 480 * Like {@link #build} but can be reused to create many different policies 481 * each backed by a different output channel. 482 */ 483 public PolicyFactory toFactory() { 484 ImmutableSet.Builder<String> textContainers = ImmutableSet.builder(); 485 for (Map.Entry<String, Boolean> textContainer 486 : this.textContainers.entrySet()) { 487 if (Boolean.TRUE.equals(textContainer.getValue())) { 488 textContainers.add(textContainer.getKey()); 489 } 490 } 491 return new PolicyFactory(compilePolicies(), textContainers.build()); 492 } 493 494 // Speed up subsequent builds by caching the compiled policies. 495 private transient ImmutableMap<String, ElementAndAttributePolicies> 496 compiledPolicies; 497 498 /** Called by mutators to signal that any compiled policy is out-of-date. */ 499 private void invalidateCompiledState() { 500 compiledPolicies = null; 501 } 502 503 private ImmutableMap<String, ElementAndAttributePolicies> compilePolicies() { 504 if (compiledPolicies != null) { return compiledPolicies; } 505 506 // Copy maps before normalizing in case builder is reused. 507 Map<String, ElementPolicy> elPolicies 508 = Maps.newLinkedHashMap(this.elPolicies); 509 Map<String, Map<String, AttributePolicy>> attrPolicies 510 = Maps.newLinkedHashMap(this.attrPolicies); 511 for (Map.Entry<String, Map<String, AttributePolicy>> e : 512 attrPolicies.entrySet()) { 513 e.setValue(Maps.newLinkedHashMap(e.getValue())); 514 } 515 Map<String, AttributePolicy> globalAttrPolicies 516 = Maps.newLinkedHashMap(this.globalAttrPolicies); 517 Set<String> allowedProtocols = ImmutableSet.copyOf(this.allowedProtocols); 518 519 // Implement requireRelNofollowOnLinks 520 if (requireRelNofollowOnLinks) { 521 ElementPolicy linkPolicy = elPolicies.get("a"); 522 if (linkPolicy == null) { 523 linkPolicy = ElementPolicy.REJECT_ALL_ELEMENT_POLICY; 524 } 525 elPolicies.put( 526 "a", 527 ElementPolicy.Util.join( 528 linkPolicy, 529 new ElementPolicy() { 530 public String apply(String elementName, List<String> attrs) { 531 for (int i = 0, n = attrs.size(); i < n; i += 2) { 532 if ("href".equals(attrs.get(i))) { 533 attrs.add("rel"); 534 attrs.add("nofollow"); 535 break; 536 } 537 } 538 return elementName; 539 } 540 })); 541 } 542 543 // Implement protocol policies. 544 // For each URL attribute that is allowed, we further constrain it by 545 // only allowing the value through if it specifies no protocol, or if it 546 // specifies one in the allowedProtocols white-list. 547 // This is done regardless of whether any protocols have been allowed, so 548 // allowing the attribute "href" globally with the identity policy but 549 // not white-listing any protocols, effectively disallows the "href" 550 // attribute globally. 551 { 552 AttributePolicy urlAttributePolicy; 553 if (allowedProtocols.size() == 3 554 && allowedProtocols.contains("mailto") 555 && allowedProtocols.contains("http") 556 && allowedProtocols.contains("https")) { 557 urlAttributePolicy = StandardUrlAttributePolicy.INSTANCE; 558 } else { 559 urlAttributePolicy = new FilterUrlByProtocolAttributePolicy( 560 allowedProtocols); 561 } 562 Set<String> toGuard = Sets.newLinkedHashSet(URL_ATTRIBUTE_NAMES); 563 for (String urlAttributeName : URL_ATTRIBUTE_NAMES) { 564 if (globalAttrPolicies.containsKey(urlAttributeName)) { 565 toGuard.remove(urlAttributeName); 566 globalAttrPolicies.put(urlAttributeName, AttributePolicy.Util.join( 567 urlAttributePolicy, globalAttrPolicies.get(urlAttributeName))); 568 } 569 } 570 // Implement guards not implemented on global policies in the per-element 571 // policy maps. 572 for (Map.Entry<String, Map<String, AttributePolicy>> e 573 : attrPolicies.entrySet()) { 574 Map<String, AttributePolicy> policies = e.getValue(); 575 for (String urlAttributeName : toGuard) { 576 if (policies.containsKey(urlAttributeName)) { 577 policies.put(urlAttributeName, AttributePolicy.Util.join( 578 urlAttributePolicy, policies.get(urlAttributeName))); 579 } 580 } 581 } 582 } 583 584 ImmutableMap.Builder<String, ElementAndAttributePolicies> policiesBuilder 585 = ImmutableMap.builder(); 586 for (Map.Entry<String, ElementPolicy> e : elPolicies.entrySet()) { 587 String elementName = e.getKey(); 588 ElementPolicy elPolicy = e.getValue(); 589 if (ElementPolicy.REJECT_ALL_ELEMENT_POLICY.equals(elPolicy)) { 590 continue; 591 } 592 593 Map<String, AttributePolicy> elAttrPolicies 594 = attrPolicies.get(elementName); 595 if (elAttrPolicies == null) { elAttrPolicies = ImmutableMap.of(); } 596 ImmutableMap.Builder<String, AttributePolicy> attrs 597 = ImmutableMap.builder(); 598 for (Map.Entry<String, AttributePolicy> ape : elAttrPolicies.entrySet()) { 599 String attributeName = ape.getKey(); 600 if (globalAttrPolicies.containsKey(attributeName)) { continue; } 601 AttributePolicy policy = ape.getValue(); 602 if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) { 603 attrs.put(attributeName, policy); 604 } 605 } 606 for (Map.Entry<String, AttributePolicy> ape 607 : globalAttrPolicies.entrySet()) { 608 String attributeName = ape.getKey(); 609 AttributePolicy policy = AttributePolicy.Util.join( 610 elAttrPolicies.get(attributeName), ape.getValue()); 611 if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) { 612 attrs.put(attributeName, policy); 613 } 614 } 615 616 policiesBuilder.put( 617 elementName, 618 new ElementAndAttributePolicies( 619 elementName, 620 elPolicy, attrs.build(), skipIfEmpty.contains(elementName))); 621 } 622 return compiledPolicies = policiesBuilder.build(); 623 } 624 625 /** 626 * Builds the relationship between attributes, the values that they may have, 627 * and the elements on which they may appear. 628 * 629 * @author Mike Samuel 630 */ 631 public final class AttributeBuilder { 632 private final List<String> attributeNames; 633 private AttributePolicy policy = AttributePolicy.IDENTITY_ATTRIBUTE_POLICY; 634 635 AttributeBuilder(List<? extends String> attributeNames) { 636 this.attributeNames = ImmutableList.copyOf(attributeNames); 637 } 638 639 /** 640 * Filters and/or transforms the attribute values 641 * allowed by later {@code allow*} calls. 642 * Multiple calls to {@code matching} are combined so that the policies 643 * receive the value in order, each seeing the value after any 644 * transformation by a previous policy. 645 */ 646 public AttributeBuilder matching(AttributePolicy policy) { 647 this.policy = AttributePolicy.Util.join(this.policy, policy); 648 return this; 649 } 650 651 /** 652 * Restrict the values allowed by later {@code allow*} calls to those 653 * matching the pattern. 654 * Multiple calls to {@code matching} are combined to restrict to the 655 * intersection of possible matched values. 656 */ 657 public AttributeBuilder matching(final Pattern pattern) { 658 return matching(new AttributePolicy() { 659 public @Nullable String apply( 660 String elementName, String attributeName, String value) { 661 return pattern.matcher(value).matches() ? value : null; 662 } 663 }); 664 } 665 666 /** 667 * Restrict the values allowed by later {@code allow*} calls to those 668 * matching the given predicate. 669 * Multiple calls to {@code matching} are combined to restrict to the 670 * intersection of possible matched values. 671 */ 672 public AttributeBuilder matching( 673 final Predicate<? super String> filter) { 674 return matching(new AttributePolicy() { 675 public @Nullable String apply( 676 String elementName, String attributeName, String value) { 677 return filter.apply(value) ? value : null; 678 } 679 }); 680 } 681 682 /** 683 * Restrict the values allowed by later {@code allow*} calls to those 684 * supplied. 685 * Multiple calls to {@code matching} are combined to restrict to the 686 * intersection of possible matched values. 687 */ 688 public AttributeBuilder matching( 689 boolean ignoreCase, String... allowedValues) { 690 return matching(ignoreCase, ImmutableSet.copyOf(allowedValues)); 691 } 692 693 /** 694 * Restrict the values allowed by later {@code allow*} calls to those 695 * supplied. 696 * Multiple calls to {@code matching} are combined to restrict to the 697 * intersection of possible matched values. 698 */ 699 public AttributeBuilder matching( 700 final boolean ignoreCase, Set<? extends String> allowedValues) { 701 final ImmutableSet<String> allowed = ImmutableSet.copyOf(allowedValues); 702 return matching(new AttributePolicy() { 703 public @Nullable String apply( 704 String elementName, String attributeName, String value) { 705 if (ignoreCase) { value = Strings.toLowerCase(value); } 706 return allowed.contains(value) ? value : null; 707 } 708 }); 709 } 710 711 /** 712 * Allows the given attributes on any elements but filters the 713 * attributes' values based on previous calls to {@code matching(...)}. 714 * Global attribute policies are applied after element specific policies. 715 * Be careful of using this with attributes like <code>type</code> which 716 * have different meanings on different attributes. 717 * Also be careful of allowing globally attributes like <code>href</code> 718 * which can have more far-reaching effects on tags like 719 * <code><base></code> and <code><link></code> than on 720 * <code><a></code> because in the former, they have an effect without 721 * user interaction and can change the behavior of the current page. 722 */ 723 public HtmlPolicyBuilder globally() { 724 return HtmlPolicyBuilder.this.allowAttributesGlobally( 725 policy, attributeNames); 726 } 727 728 /** 729 * Allows the named attributes on the given elements but filters the 730 * attributes' values based on previous calls to {@code matching(...)}. 731 */ 732 public HtmlPolicyBuilder onElements(String... elementNames) { 733 ImmutableList.Builder<String> b = ImmutableList.builder(); 734 for (String elementName : elementNames) { 735 b.add(HtmlLexer.canonicalName(elementName)); 736 } 737 return HtmlPolicyBuilder.this.allowAttributesOnElements( 738 policy, attributeNames, b.build()); 739 } 740 } 741 }