001 // Copyright (c) 2011, Mike Samuel 002 // All rights reserved. 003 // 004 // Redistribution and use in source and binary forms, with or without 005 // modification, are permitted provided that the following conditions 006 // are met: 007 // 008 // Redistributions of source code must retain the above copyright 009 // notice, this list of conditions and the following disclaimer. 010 // Redistributions in binary form must reproduce the above copyright 011 // notice, this list of conditions and the following disclaimer in the 012 // documentation and/or other materials provided with the distribution. 013 // Neither the name of the OWASP nor the names of its contributors may 014 // be used to endorse or promote products derived from this software 015 // without specific prior written permission. 016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 019 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 020 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 021 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 022 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 023 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 024 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 025 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 026 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 027 // POSSIBILITY OF SUCH DAMAGE. 028 029 package org.owasp.html; 030 031 import javax.annotation.Nullable; 032 033 import com.google.common.collect.ImmutableSet; 034 035 /** 036 * An attribute policy for attributes whose values are URLs that requires that 037 * the value have no protocol or have an allowed protocol. 038 * 039 * <p> 040 * URLs with protocols must match the protocol set passed to the constructor. 041 * URLs without protocols but which specify an origin different from the 042 * containing page (e.g. {@code //example.org}) are only allowed if the 043 * {@link FilterUrlByProtocolAttributePolicy#allowProtocolRelativeUrls policy} 044 * allows both {@code http} and {@code https} which are normally used to serve 045 * HTML. 046 * Same-origin URLs, URLs without any protocol or authority part are always 047 * allowed. 048 * </p> 049 * 050 * <p> 051 * This class assumes that URLs are either hierarchical, or are opaque, but 052 * do not look like they contain an authority portion. 053 * </p> 054 * 055 * @author Mike Samuel <mikesamuel@gmail.com> 056 */ 057 @TCB 058 public class FilterUrlByProtocolAttributePolicy implements AttributePolicy { 059 private final ImmutableSet<String> protocols; 060 061 public FilterUrlByProtocolAttributePolicy( 062 Iterable<? extends String> protocols) { 063 this.protocols = ImmutableSet.copyOf(protocols); 064 } 065 066 public @Nullable String apply( 067 String elementName, String attributeName, String s) { 068 protocol_loop: 069 for (int i = 0, n = s.length(); i < n; ++i) { 070 switch (s.charAt(i)) { 071 case '/': case '#': case '?': // No protocol. 072 // Check for domain relative URLs like //www.evil.org/ 073 if (s.startsWith("//") 074 // or the protocols by which HTML is normally served are OK. 075 && !allowProtocolRelativeUrls()) { 076 return null; 077 } 078 break protocol_loop; 079 case ':': 080 if (!protocols.contains(s.substring(i))) { return null; } 081 break protocol_loop; 082 } 083 } 084 return normalizeUri(s); 085 } 086 087 protected boolean allowProtocolRelativeUrls() { 088 return protocols.contains("http") && protocols.contains("https"); 089 } 090 091 /** Percent encodes anything that looks like a colon, or a parenthesis. */ 092 static String normalizeUri(String s) { 093 int n = s.length(); 094 boolean colonsIrrelevant = false; 095 for (int i = 0; i < n; ++i) { 096 char ch = s.charAt(i); 097 switch (ch) { 098 case '/': case '#': case '?': case ':': 099 colonsIrrelevant = true; 100 break; 101 case '(': case ')': case '\uff1a': 102 StringBuilder sb = new StringBuilder(n + 16); 103 int pos = 0; 104 for (; i < n; ++i) { 105 ch = s.charAt(i); 106 switch (ch) { 107 case '(': 108 sb.append(s, pos, i).append("%28"); 109 pos = i + 1; 110 break; 111 case ')': 112 sb.append(s, pos, i).append("%29"); 113 pos = i + 1; 114 break; 115 case '\uff1a': // Full-width colon. 116 if (!colonsIrrelevant) { 117 // TODO: do we need to encode non-colon characters if we're 118 // not dealing with URLs that haven't been copy/pasted into 119 // the URL bar? 120 // Is it safe to assume UTF-8 here? 121 sb.append(s, pos, i).append("%ef%bc%9a"); 122 pos = i + 1; 123 } 124 break; 125 } 126 } 127 return sb.append(s, pos, n).toString(); 128 } 129 } 130 return s; 131 } 132 133 }