blob: c7a6301f1210ac231f84dd7af6406285ace52c8b [file] [log] [blame]
ulan@chromium.org2e04b582013-02-21 14:06:02 +00001// Copyright 2013 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_URI_H_
29#define V8_URI_H_
30
31#include "v8.h"
32
33#include "string-search.h"
34#include "v8utils.h"
35#include "v8conversions.h"
36
37namespace v8 {
38namespace internal {
39
40
41template <typename Char>
42static INLINE(Vector<const Char> GetCharVector(Handle<String> string));
43
44
45template <>
46Vector<const uint8_t> GetCharVector(Handle<String> string) {
47 String::FlatContent flat = string->GetFlatContent();
48 ASSERT(flat.IsAscii());
49 return flat.ToOneByteVector();
50}
51
52
53template <>
54Vector<const uc16> GetCharVector(Handle<String> string) {
55 String::FlatContent flat = string->GetFlatContent();
56 ASSERT(flat.IsTwoByte());
57 return flat.ToUC16Vector();
58}
59
60
61class URIUnescape : public AllStatic {
62 public:
63 template<typename Char>
64 static Handle<String> Unescape(Isolate* isolate, Handle<String> source);
65
66 private:
67 static const signed char kHexValue['g'];
68
69 template<typename Char>
70 static Handle<String> UnescapeSlow(
71 Isolate* isolate, Handle<String> string, int start_index);
72
73 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
74
75 template <typename Char>
76 static INLINE(int UnescapeChar(Vector<const Char> vector,
77 int i,
78 int length,
79 int* step));
80};
81
82
83const signed char URIUnescape::kHexValue[] = {
84 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
85 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
86 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
87 -0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
88 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
89 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90 -1, 10, 11, 12, 13, 14, 15 };
91
92
93template<typename Char>
94Handle<String> URIUnescape::Unescape(Isolate* isolate, Handle<String> source) {
95 int index;
96 { AssertNoAllocation no_allocation;
97 StringSearch<uint8_t, Char> search(isolate, STATIC_ASCII_VECTOR("%"));
98 index = search.Search(GetCharVector<Char>(source), 0);
99 if (index < 0) return source;
100 }
101 return UnescapeSlow<Char>(isolate, source, index);
102}
103
104
105template <typename Char>
106Handle<String> URIUnescape::UnescapeSlow(
107 Isolate* isolate, Handle<String> string, int start_index) {
108 bool one_byte = true;
109 int length = string->length();
110
111 int unescaped_length = 0;
112 { AssertNoAllocation no_allocation;
113 Vector<const Char> vector = GetCharVector<Char>(string);
114 for (int i = start_index; i < length; unescaped_length++) {
115 int step;
116 if (UnescapeChar(vector, i, length, &step) >
117 String::kMaxOneByteCharCode) {
118 one_byte = false;
119 }
120 i += step;
121 }
122 }
123
124 ASSERT(start_index < length);
125 Handle<String> first_part =
126 isolate->factory()->NewProperSubString(string, 0, start_index);
127
128 int dest_position = 0;
129 Handle<String> second_part;
130 if (one_byte) {
131 Handle<SeqOneByteString> dest =
132 isolate->factory()->NewRawOneByteString(unescaped_length);
133 AssertNoAllocation no_allocation;
134 Vector<const Char> vector = GetCharVector<Char>(string);
135 for (int i = start_index; i < length; dest_position++) {
136 int step;
137 dest->SeqOneByteStringSet(dest_position,
138 UnescapeChar(vector, i, length, &step));
139 i += step;
140 }
141 second_part = dest;
142 } else {
143 Handle<SeqTwoByteString> dest =
144 isolate->factory()->NewRawTwoByteString(unescaped_length);
145 AssertNoAllocation no_allocation;
146 Vector<const Char> vector = GetCharVector<Char>(string);
147 for (int i = start_index; i < length; dest_position++) {
148 int step;
149 dest->SeqTwoByteStringSet(dest_position,
150 UnescapeChar(vector, i, length, &step));
151 i += step;
152 }
153 second_part = dest;
154 }
155 return isolate->factory()->NewConsString(first_part, second_part);
156}
157
158
159int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
160 if (character1 > 'f') return -1;
161 int hi = kHexValue[character1];
162 if (hi == -1) return -1;
163 if (character2 > 'f') return -1;
164 int lo = kHexValue[character2];
165 if (lo == -1) return -1;
166 return (hi << 4) + lo;
167}
168
169
170template <typename Char>
171int URIUnescape::UnescapeChar(Vector<const Char> vector,
172 int i,
173 int length,
174 int* step) {
175 uint16_t character = vector[i];
176 int32_t hi = 0;
177 int32_t lo = 0;
178 if (character == '%' &&
179 i <= length - 6 &&
180 vector[i + 1] == 'u' &&
181 (hi = TwoDigitHex(vector[i + 2],
182 vector[i + 3])) != -1 &&
183 (lo = TwoDigitHex(vector[i + 4],
184 vector[i + 5])) != -1) {
185 *step = 6;
186 return (hi << 8) + lo;
187 } else if (character == '%' &&
188 i <= length - 3 &&
189 (lo = TwoDigitHex(vector[i + 1],
190 vector[i + 2])) != -1) {
191 *step = 3;
192 return lo;
193 } else {
194 *step = 1;
195 return character;
196 }
197}
198
199
200class URIEscape : public AllStatic {
201 public:
202 template<typename Char>
203 static Handle<String> Escape(Isolate* isolate, Handle<String> string);
204
205 private:
206 static const char kHexChars[17];
207 static const char kNotEscaped[256];
208
209 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
210};
211
212
213const char URIEscape::kHexChars[] = "0123456789ABCDEF";
214
215
216// kNotEscaped is generated by the following:
217//
218// #!/bin/perl
219// for (my $i = 0; $i < 256; $i++) {
220// print "\n" if $i % 16 == 0;
221// my $c = chr($i);
222// my $escaped = 1;
223// $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
224// print $escaped ? "0, " : "1, ";
225// }
226
227const char URIEscape::kNotEscaped[] = {
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
231 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
232 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
233 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
234 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
235 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
236 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
237 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
239 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
240 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
241 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
242 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
243 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
244
245
246template<typename Char>
247Handle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
248 ASSERT(string->IsFlat());
249 int escaped_length = 0;
250 int length = string->length();
251
252 { AssertNoAllocation no_allocation;
253 Vector<const Char> vector = GetCharVector<Char>(string);
254 for (int i = 0; i < length; i++) {
255 uint16_t c = vector[i];
256 if (c >= 256) {
257 escaped_length += 6;
258 } else if (IsNotEscaped(c)) {
259 escaped_length++;
260 } else {
261 escaped_length += 3;
262 }
263
264 // We don't allow strings that are longer than a maximal length.
265 ASSERT(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
266 if (escaped_length > String::kMaxLength) {
267 isolate->context()->mark_out_of_memory();
268 return Handle<String>::null();
269 }
270 }
271 }
272
273 // No length change implies no change. Return original string if no change.
274 if (escaped_length == length) return string;
275
276 Handle<SeqOneByteString> dest =
277 isolate->factory()->NewRawOneByteString(escaped_length);
278 int dest_position = 0;
279
280 { AssertNoAllocation no_allocation;
281 Vector<const Char> vector = GetCharVector<Char>(string);
282 for (int i = 0; i < length; i++) {
283 uint16_t c = vector[i];
284 if (c >= 256) {
285 dest->SeqOneByteStringSet(dest_position, '%');
286 dest->SeqOneByteStringSet(dest_position+1, 'u');
287 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]);
288 dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]);
289 dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]);
290 dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]);
291 dest_position += 6;
292 } else if (IsNotEscaped(c)) {
293 dest->SeqOneByteStringSet(dest_position, c);
294 dest_position++;
295 } else {
296 dest->SeqOneByteStringSet(dest_position, '%');
297 dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]);
298 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]);
299 dest_position += 3;
300 }
301 }
302 }
303
304 return dest;
305}
306
307} } // namespace v8::internal
308
309#endif // V8_URI_H_