Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame^] | 1 | // Copyright 2011 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package template |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "fmt" |
| 10 | "strings" |
| 11 | "unicode/utf8" |
| 12 | ) |
| 13 | |
| 14 | // htmlNospaceEscaper escapes for inclusion in unquoted attribute values. |
| 15 | func htmlNospaceEscaper(args ...interface{}) string { |
| 16 | s, t := stringify(args...) |
| 17 | if t == contentTypeHTML { |
| 18 | return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false) |
| 19 | } |
| 20 | return htmlReplacer(s, htmlNospaceReplacementTable, false) |
| 21 | } |
| 22 | |
| 23 | // attrEscaper escapes for inclusion in quoted attribute values. |
| 24 | func attrEscaper(args ...interface{}) string { |
| 25 | s, t := stringify(args...) |
| 26 | if t == contentTypeHTML { |
| 27 | return htmlReplacer(stripTags(s), htmlNormReplacementTable, true) |
| 28 | } |
| 29 | return htmlReplacer(s, htmlReplacementTable, true) |
| 30 | } |
| 31 | |
| 32 | // rcdataEscaper escapes for inclusion in an RCDATA element body. |
| 33 | func rcdataEscaper(args ...interface{}) string { |
| 34 | s, t := stringify(args...) |
| 35 | if t == contentTypeHTML { |
| 36 | return htmlReplacer(s, htmlNormReplacementTable, true) |
| 37 | } |
| 38 | return htmlReplacer(s, htmlReplacementTable, true) |
| 39 | } |
| 40 | |
| 41 | // htmlEscaper escapes for inclusion in HTML text. |
| 42 | func htmlEscaper(args ...interface{}) string { |
| 43 | s, t := stringify(args...) |
| 44 | if t == contentTypeHTML { |
| 45 | return s |
| 46 | } |
| 47 | return htmlReplacer(s, htmlReplacementTable, true) |
| 48 | } |
| 49 | |
| 50 | // htmlReplacementTable contains the runes that need to be escaped |
| 51 | // inside a quoted attribute value or in a text node. |
| 52 | var htmlReplacementTable = []string{ |
| 53 | // http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state |
| 54 | // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT |
| 55 | // CHARACTER character to the current attribute's value. |
| 56 | // " |
| 57 | // and similarly |
| 58 | // http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state |
| 59 | 0: "\uFFFD", |
| 60 | '"': """, |
| 61 | '&': "&", |
| 62 | '\'': "'", |
| 63 | '+': "+", |
| 64 | '<': "<", |
| 65 | '>': ">", |
| 66 | } |
| 67 | |
| 68 | // htmlNormReplacementTable is like htmlReplacementTable but without '&' to |
| 69 | // avoid over-encoding existing entities. |
| 70 | var htmlNormReplacementTable = []string{ |
| 71 | 0: "\uFFFD", |
| 72 | '"': """, |
| 73 | '\'': "'", |
| 74 | '+': "+", |
| 75 | '<': "<", |
| 76 | '>': ">", |
| 77 | } |
| 78 | |
| 79 | // htmlNospaceReplacementTable contains the runes that need to be escaped |
| 80 | // inside an unquoted attribute value. |
| 81 | // The set of runes escaped is the union of the HTML specials and |
| 82 | // those determined by running the JS below in browsers: |
| 83 | // <div id=d></div> |
| 84 | // <script>(function () { |
| 85 | // var a = [], d = document.getElementById("d"), i, c, s; |
| 86 | // for (i = 0; i < 0x10000; ++i) { |
| 87 | // c = String.fromCharCode(i); |
| 88 | // d.innerHTML = "<span title=" + c + "lt" + c + "></span>" |
| 89 | // s = d.getElementsByTagName("SPAN")[0]; |
| 90 | // if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); } |
| 91 | // } |
| 92 | // document.write(a.join(", ")); |
| 93 | // })()</script> |
| 94 | var htmlNospaceReplacementTable = []string{ |
| 95 | 0: "�", |
| 96 | '\t': "	", |
| 97 | '\n': " ", |
| 98 | '\v': "", |
| 99 | '\f': "", |
| 100 | '\r': " ", |
| 101 | ' ': " ", |
| 102 | '"': """, |
| 103 | '&': "&", |
| 104 | '\'': "'", |
| 105 | '+': "+", |
| 106 | '<': "<", |
| 107 | '=': "=", |
| 108 | '>': ">", |
| 109 | // A parse error in the attribute value (unquoted) and |
| 110 | // before attribute value states. |
| 111 | // Treated as a quoting character by IE. |
| 112 | '`': "`", |
| 113 | } |
| 114 | |
| 115 | // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but |
| 116 | // without '&' to avoid over-encoding existing entities. |
| 117 | var htmlNospaceNormReplacementTable = []string{ |
| 118 | 0: "�", |
| 119 | '\t': "	", |
| 120 | '\n': " ", |
| 121 | '\v': "", |
| 122 | '\f': "", |
| 123 | '\r': " ", |
| 124 | ' ': " ", |
| 125 | '"': """, |
| 126 | '\'': "'", |
| 127 | '+': "+", |
| 128 | '<': "<", |
| 129 | '=': "=", |
| 130 | '>': ">", |
| 131 | // A parse error in the attribute value (unquoted) and |
| 132 | // before attribute value states. |
| 133 | // Treated as a quoting character by IE. |
| 134 | '`': "`", |
| 135 | } |
| 136 | |
| 137 | // htmlReplacer returns s with runes replaced according to replacementTable |
| 138 | // and when badRunes is true, certain bad runes are allowed through unescaped. |
| 139 | func htmlReplacer(s string, replacementTable []string, badRunes bool) string { |
| 140 | written, b := 0, new(bytes.Buffer) |
| 141 | for i, r := range s { |
| 142 | if int(r) < len(replacementTable) { |
| 143 | if repl := replacementTable[r]; len(repl) != 0 { |
| 144 | b.WriteString(s[written:i]) |
| 145 | b.WriteString(repl) |
| 146 | // Valid as long as replacementTable doesn't |
| 147 | // include anything above 0x7f. |
| 148 | written = i + utf8.RuneLen(r) |
| 149 | } |
| 150 | } else if badRunes { |
| 151 | // No-op. |
| 152 | // IE does not allow these ranges in unquoted attrs. |
| 153 | } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff { |
| 154 | fmt.Fprintf(b, "%s&#x%x;", s[written:i], r) |
| 155 | written = i + utf8.RuneLen(r) |
| 156 | } |
| 157 | } |
| 158 | if written == 0 { |
| 159 | return s |
| 160 | } |
| 161 | b.WriteString(s[written:]) |
| 162 | return b.String() |
| 163 | } |
| 164 | |
| 165 | // stripTags takes a snippet of HTML and returns only the text content. |
| 166 | // For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `. |
| 167 | func stripTags(html string) string { |
| 168 | var b bytes.Buffer |
| 169 | s, c, i, allText := []byte(html), context{}, 0, true |
| 170 | // Using the transition funcs helps us avoid mangling |
| 171 | // `<div title="1>2">` or `I <3 Ponies!`. |
| 172 | for i != len(s) { |
| 173 | if c.delim == delimNone { |
| 174 | st := c.state |
| 175 | // Use RCDATA instead of parsing into JS or CSS styles. |
| 176 | if c.element != elementNone && !isInTag(st) { |
| 177 | st = stateRCDATA |
| 178 | } |
| 179 | d, nread := transitionFunc[st](c, s[i:]) |
| 180 | i1 := i + nread |
| 181 | if c.state == stateText || c.state == stateRCDATA { |
| 182 | // Emit text up to the start of the tag or comment. |
| 183 | j := i1 |
| 184 | if d.state != c.state { |
| 185 | for j1 := j - 1; j1 >= i; j1-- { |
| 186 | if s[j1] == '<' { |
| 187 | j = j1 |
| 188 | break |
| 189 | } |
| 190 | } |
| 191 | } |
| 192 | b.Write(s[i:j]) |
| 193 | } else { |
| 194 | allText = false |
| 195 | } |
| 196 | c, i = d, i1 |
| 197 | continue |
| 198 | } |
| 199 | i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim]) |
| 200 | if i1 < i { |
| 201 | break |
| 202 | } |
| 203 | if c.delim != delimSpaceOrTagEnd { |
| 204 | // Consume any quote. |
| 205 | i1++ |
| 206 | } |
| 207 | c, i = context{state: stateTag, element: c.element}, i1 |
| 208 | } |
| 209 | if allText { |
| 210 | return html |
| 211 | } else if c.state == stateText || c.state == stateRCDATA { |
| 212 | b.Write(s[i:]) |
| 213 | } |
| 214 | return b.String() |
| 215 | } |
| 216 | |
| 217 | // htmlNameFilter accepts valid parts of an HTML attribute or tag name or |
| 218 | // a known-safe HTML attribute. |
| 219 | func htmlNameFilter(args ...interface{}) string { |
| 220 | s, t := stringify(args...) |
| 221 | if t == contentTypeHTMLAttr { |
| 222 | return s |
| 223 | } |
| 224 | if len(s) == 0 { |
| 225 | // Avoid violation of structure preservation. |
| 226 | // <input checked {{.K}}={{.V}}>. |
| 227 | // Without this, if .K is empty then .V is the value of |
| 228 | // checked, but otherwise .V is the value of the attribute |
| 229 | // named .K. |
| 230 | return filterFailsafe |
| 231 | } |
| 232 | s = strings.ToLower(s) |
| 233 | if t := attrType(s); t != contentTypePlain { |
| 234 | // TODO: Split attr and element name part filters so we can whitelist |
| 235 | // attributes. |
| 236 | return filterFailsafe |
| 237 | } |
| 238 | for _, r := range s { |
| 239 | switch { |
| 240 | case '0' <= r && r <= '9': |
| 241 | case 'a' <= r && r <= 'z': |
| 242 | default: |
| 243 | return filterFailsafe |
| 244 | } |
| 245 | } |
| 246 | return s |
| 247 | } |
| 248 | |
| 249 | // commentEscaper returns the empty string regardless of input. |
| 250 | // Comment content does not correspond to any parsed structure or |
| 251 | // human-readable content, so the simplest and most secure policy is to drop |
| 252 | // content interpolated into comments. |
| 253 | // This approach is equally valid whether or not static comment content is |
| 254 | // removed from the template. |
| 255 | func commentEscaper(args ...interface{}) string { |
| 256 | return "" |
| 257 | } |