blob: 0dfe7652ad418775fef22c235b48784eae570a99 [file] [log] [blame]
ager@chromium.org9258b6b2008-09-11 09:11:10 +00001// Copyright 2006-2008 the V8 project authors. All rights reserved.
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// This file contains support for URI manipulations written in
29// JavaScript.
30
31// Expect $String = global.String;
32
33function URIAddEncodedOctetToBuffer(octet, result, index) {
34 result[index++] = 37; // Char code of '%'.
35 result[index++] = hexCharCodeArray[octet >> 4];
36 result[index++] = hexCharCodeArray[octet & 0x0F];
37 return index;
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000038}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000039
40
41function URIEncodeOctets(octets, result, index) {
ager@chromium.orgadd848f2009-08-13 12:44:13 +000042 if (hexCharCodeArray === 0) {
43 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
44 65, 66, 67, 68, 69, 70];
45 }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000046 index = URIAddEncodedOctetToBuffer(octets[0], result, index);
47 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
48 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
49 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
50 return index;
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000051}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000052
53
54function URIEncodeSingle(cc, result, index) {
55 var x = (cc >> 12) & 0xF;
56 var y = (cc >> 6) & 63;
57 var z = cc & 63;
58 var octets = new $Array(3);
59 if (cc <= 0x007F) {
60 octets[0] = cc;
61 } else if (cc <= 0x07FF) {
62 octets[0] = y + 192;
63 octets[1] = z + 128;
64 } else {
65 octets[0] = x + 224;
66 octets[1] = y + 128;
67 octets[2] = z + 128;
68 }
69 return URIEncodeOctets(octets, result, index);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000070}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000071
72
73function URIEncodePair(cc1 , cc2, result, index) {
74 var u = ((cc1 >> 6) & 0xF) + 1;
75 var w = (cc1 >> 2) & 0xF;
76 var x = cc1 & 3;
77 var y = (cc2 >> 6) & 0xF;
78 var z = cc2 & 63;
79 var octets = new $Array(4);
80 octets[0] = (u >> 2) + 240;
81 octets[1] = (((u & 3) << 4) | w) + 128;
82 octets[2] = ((x << 4) | y) + 128;
83 octets[3] = z + 128;
84 return URIEncodeOctets(octets, result, index);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000085}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000086
87
88function URIHexCharsToCharCode(ch1, ch2) {
89 if (HexValueOf(ch1) == -1 || HexValueOf(ch2) == -1) {
90 throw new $URIError("URI malformed");
91 }
92 return HexStrToCharCode(ch1 + ch2);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000093}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000094
95
96function URIDecodeOctets(octets, result, index) {
kasperl@chromium.org7be3c992009-03-12 07:19:55 +000097 var value;
98 var o0 = octets[0];
99 if (o0 < 0x80) {
100 value = o0;
101 } else if (o0 < 0xc2) {
102 throw new $URIError("URI malformed");
103 } else {
104 var o1 = octets[1];
105 if (o0 < 0xe0) {
106 var a = o0 & 0x1f;
107 if ((o1 < 0x80) || (o1 > 0xbf))
108 throw new $URIError("URI malformed");
109 var b = o1 & 0x3f;
110 value = (a << 6) + b;
111 if (value < 0x80 || value > 0x7ff)
112 throw new $URIError("URI malformed");
113 } else {
114 var o2 = octets[2];
115 if (o0 < 0xf0) {
116 var a = o0 & 0x0f;
117 if ((o1 < 0x80) || (o1 > 0xbf))
118 throw new $URIError("URI malformed");
119 var b = o1 & 0x3f;
120 if ((o2 < 0x80) || (o2 > 0xbf))
121 throw new $URIError("URI malformed");
122 var c = o2 & 0x3f;
123 value = (a << 12) + (b << 6) + c;
124 if ((value < 0x800) || (value > 0xffff))
125 throw new $URIError("URI malformed");
126 } else {
127 var o3 = octets[3];
128 if (o0 < 0xf8) {
129 var a = (o0 & 0x07);
130 if ((o1 < 0x80) || (o1 > 0xbf))
131 throw new $URIError("URI malformed");
132 var b = (o1 & 0x3f);
133 if ((o2 < 0x80) || (o2 > 0xbf))
134 throw new $URIError("URI malformed");
135 var c = (o2 & 0x3f);
136 if ((o3 < 0x80) || (o3 > 0xbf))
137 throw new $URIError("URI malformed");
138 var d = (o3 & 0x3f);
139 value = (a << 18) + (b << 12) + (c << 6) + d;
140 if ((value < 0x10000) || (value > 0x10ffff))
141 throw new $URIError("URI malformed");
142 } else {
143 throw new $URIError("URI malformed");
144 }
145 }
146 }
147 }
148 if (value < 0x10000) {
149 result[index++] = value;
150 return index;
151 } else {
152 result[index++] = (value >> 10) + 0xd7c0;
153 result[index++] = (value & 0x3ff) + 0xdc00;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000154 return index;
155 }
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000156}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000157
158
159// ECMA-262, section 15.1.3
160function Encode(uri, unescape) {
161 var uriLength = uri.length;
162 var result = new $Array(uriLength);
163 var index = 0;
164 for (var k = 0; k < uriLength; k++) {
165 var cc1 = uri.charCodeAt(k);
166 if (unescape(cc1)) {
167 result[index++] = cc1;
168 } else {
169 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
170 if (cc1 < 0xD800 || cc1 > 0xDBFF) {
171 index = URIEncodeSingle(cc1, result, index);
172 } else {
173 k++;
174 if (k == uriLength) throw new $URIError("URI malformed");
175 var cc2 = uri.charCodeAt(k);
176 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
177 index = URIEncodePair(cc1, cc2, result, index);
178 }
179 }
180 }
181 return %StringFromCharCodeArray(result);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000182}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000183
184
185// ECMA-262, section 15.1.3
186function Decode(uri, reserved) {
187 var uriLength = uri.length;
188 var result = new $Array(uriLength);
189 var index = 0;
190 for (var k = 0; k < uriLength; k++) {
191 var ch = uri.charAt(k);
192 if (ch == '%') {
193 if (k + 2 >= uriLength) throw new $URIError("URI malformed");
194 var cc = URIHexCharsToCharCode(uri.charAt(++k), uri.charAt(++k));
195 if (cc >> 7) {
196 var n = 0;
197 while (((cc << ++n) & 0x80) != 0) ;
198 if (n == 1 || n > 4) throw new $URIError("URI malformed");
199 var octets = new $Array(n);
200 octets[0] = cc;
201 if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
202 for (var i = 1; i < n; i++) {
203 k++;
204 octets[i] = URIHexCharsToCharCode(uri.charAt(++k), uri.charAt(++k));
205 }
206 index = URIDecodeOctets(octets, result, index);
207 } else {
208 if (reserved(cc)) {
209 result[index++] = 37; // Char code of '%'.
210 result[index++] = uri.charCodeAt(k - 1);
211 result[index++] = uri.charCodeAt(k);
212 } else {
213 result[index++] = cc;
214 }
215 }
216 } else {
217 result[index++] = ch.charCodeAt(0);
218 }
219 }
220 result.length = index;
221 return %StringFromCharCodeArray(result);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000222}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000223
224
225// ECMA-262 - 15.1.3.1.
226function URIDecode(uri) {
227 function reservedPredicate(cc) {
228 // #$
229 if (35 <= cc && cc <= 36) return true;
230 // &
231 if (cc == 38) return true;
232 // +,
233 if (43 <= cc && cc <= 44) return true;
234 // /
235 if (cc == 47) return true;
236 // :;
237 if (58 <= cc && cc <= 59) return true;
238 // =
239 if (cc == 61) return true;
240 // ?@
241 if (63 <= cc && cc <= 64) return true;
242
243 return false;
244 };
245 var string = ToString(uri);
246 return Decode(string, reservedPredicate);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000247}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000248
249
250// ECMA-262 - 15.1.3.2.
251function URIDecodeComponent(component) {
252 function reservedPredicate(cc) { return false; };
253 var string = ToString(component);
254 return Decode(string, reservedPredicate);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000255}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000256
257
258// Does the char code correspond to an alpha-numeric char.
259function isAlphaNumeric(cc) {
260 // a - z
261 if (97 <= cc && cc <= 122) return true;
262 // A - Z
263 if (65 <= cc && cc <= 90) return true;
264 // 0 - 9
265 if (48 <= cc && cc <= 57) return true;
266
267 return false;
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000268}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000269
270
271// ECMA-262 - 15.1.3.3.
272function URIEncode(uri) {
273 function unescapePredicate(cc) {
274 if (isAlphaNumeric(cc)) return true;
275 // !
276 if (cc == 33) return true;
277 // #$
278 if (35 <= cc && cc <= 36) return true;
279 // &'()*+,-./
280 if (38 <= cc && cc <= 47) return true;
281 // :;
282 if (58 <= cc && cc <= 59) return true;
283 // =
284 if (cc == 61) return true;
285 // ?@
286 if (63 <= cc && cc <= 64) return true;
287 // _
288 if (cc == 95) return true;
289 // ~
290 if (cc == 126) return true;
291
292 return false;
293 };
294
295 var string = ToString(uri);
296 return Encode(string, unescapePredicate);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000297}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000298
299
300// ECMA-262 - 15.1.3.4
301function URIEncodeComponent(component) {
302 function unescapePredicate(cc) {
303 if (isAlphaNumeric(cc)) return true;
304 // !
305 if (cc == 33) return true;
306 // '()*
307 if (39 <= cc && cc <= 42) return true;
308 // -.
309 if (45 <= cc && cc <= 46) return true;
310 // _
311 if (cc == 95) return true;
312 // ~
313 if (cc == 126) return true;
314
315 return false;
316 };
317
318 var string = ToString(component);
319 return Encode(string, unescapePredicate);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000320}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000321
322
ager@chromium.orgadd848f2009-08-13 12:44:13 +0000323// Lazily initialized.
324var hexCharArray = 0;
325var hexCharCodeArray = 0;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000326
327
328function HexValueOf(c) {
329 var code = c.charCodeAt(0);
330
331 // 0-9
332 if (code >= 48 && code <= 57) return code - 48;
333 // A-F
334 if (code >= 65 && code <= 70) return code - 55;
335 // a-f
336 if (code >= 97 && code <= 102) return code - 87;
337
338 return -1;
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000339}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000340
341
342// Convert a character code to 4-digit hex string representation
343// 64 -> 0040, 62234 -> F31A.
344function CharCodeToHex4Str(cc) {
345 var r = "";
ager@chromium.orgadd848f2009-08-13 12:44:13 +0000346 if (hexCharArray === 0) {
347 hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
348 "A", "B", "C", "D", "E", "F"];
349 }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000350 for (var i = 0; i < 4; ++i) {
351 var c = hexCharArray[cc & 0x0F];
352 r = c + r;
353 cc = cc >>> 4;
354 }
355 return r;
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000356}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000357
358
359// Converts hex string to char code. Not efficient.
360function HexStrToCharCode(s) {
361 var m = 0;
362 var r = 0;
363 for (var i = s.length - 1; i >= 0; --i) {
364 r = r + (HexValueOf(s.charAt(i)) << m);
365 m = m + 4;
366 }
367 return r;
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000368}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000369
370
371// Returns true if all digits in string s are valid hex numbers
372function IsValidHex(s) {
373 for (var i = 0; i < s.length; ++i) {
374 var cc = s.charCodeAt(i);
375 if ((48 <= cc && cc <= 57) || (65 <= cc && cc <= 70) || (97 <= cc && cc <= 102)) {
376 // '0'..'9', 'A'..'F' and 'a' .. 'f'.
377 } else {
378 return false;
379 }
380 }
381 return true;
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000382}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000383
384
385// ECMA-262 - B.2.1.
386function URIEscape(str) {
387 var s = ToString(str);
388 return %URIEscape(s);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000389}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000390
391
392// ECMA-262 - B.2.2.
393function URIUnescape(str) {
394 var s = ToString(str);
395 return %URIUnescape(s);
396}
397
398
399// -------------------------------------------------------------------
400
401function SetupURI() {
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000402 // Setup non-enumerable URI functions on the global object and set
403 // their names.
404 InstallFunctions(global, DONT_ENUM, $Array(
405 "escape", URIEscape,
406 "unescape", URIUnescape,
407 "decodeURI", URIDecode,
408 "decodeURIComponent", URIDecodeComponent,
409 "encodeURI", URIEncode,
410 "encodeURIComponent", URIEncodeComponent
411 ));
412}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000413
414SetupURI();
415