blob: 477071ac78d2e09d6d56d5ac4818a858aed5a734 [file] [log] [blame]
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/v8.h"
6
7#include "src/arguments.h"
8#include "src/conversions.h"
9#include "src/runtime/runtime-utils.h"
10#include "src/string-search.h"
11#include "src/utils.h"
12
13
14namespace v8 {
15namespace internal {
16
17class URIUnescape : public AllStatic {
18 public:
19 template <typename Char>
20 MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate,
21 Handle<String> source);
22
23 private:
24 static const signed char kHexValue['g'];
25
26 template <typename Char>
27 MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(Isolate* isolate,
28 Handle<String> string,
29 int start_index);
30
31 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
32
33 template <typename Char>
34 static INLINE(int UnescapeChar(Vector<const Char> vector, int i, int length,
35 int* step));
36};
37
38
39const signed char URIUnescape::kHexValue[] = {
40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -0, 1, 2, 3, 4, 5,
43 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1,
44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
45 -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15};
46
47
48template <typename Char>
49MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate,
50 Handle<String> source) {
51 int index;
52 {
53 DisallowHeapAllocation no_allocation;
54 StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
55 index = search.Search(source->GetCharVector<Char>(), 0);
56 if (index < 0) return source;
57 }
58 return UnescapeSlow<Char>(isolate, source, index);
59}
60
61
62template <typename Char>
63MaybeHandle<String> URIUnescape::UnescapeSlow(Isolate* isolate,
64 Handle<String> string,
65 int start_index) {
66 bool one_byte = true;
67 int length = string->length();
68
69 int unescaped_length = 0;
70 {
71 DisallowHeapAllocation no_allocation;
72 Vector<const Char> vector = string->GetCharVector<Char>();
73 for (int i = start_index; i < length; unescaped_length++) {
74 int step;
75 if (UnescapeChar(vector, i, length, &step) >
76 String::kMaxOneByteCharCode) {
77 one_byte = false;
78 }
79 i += step;
80 }
81 }
82
83 DCHECK(start_index < length);
84 Handle<String> first_part =
85 isolate->factory()->NewProperSubString(string, 0, start_index);
86
87 int dest_position = 0;
88 Handle<String> second_part;
89 DCHECK(unescaped_length <= String::kMaxLength);
90 if (one_byte) {
91 Handle<SeqOneByteString> dest = isolate->factory()
92 ->NewRawOneByteString(unescaped_length)
93 .ToHandleChecked();
94 DisallowHeapAllocation no_allocation;
95 Vector<const Char> vector = string->GetCharVector<Char>();
96 for (int i = start_index; i < length; dest_position++) {
97 int step;
98 dest->SeqOneByteStringSet(dest_position,
99 UnescapeChar(vector, i, length, &step));
100 i += step;
101 }
102 second_part = dest;
103 } else {
104 Handle<SeqTwoByteString> dest = isolate->factory()
105 ->NewRawTwoByteString(unescaped_length)
106 .ToHandleChecked();
107 DisallowHeapAllocation no_allocation;
108 Vector<const Char> vector = string->GetCharVector<Char>();
109 for (int i = start_index; i < length; dest_position++) {
110 int step;
111 dest->SeqTwoByteStringSet(dest_position,
112 UnescapeChar(vector, i, length, &step));
113 i += step;
114 }
115 second_part = dest;
116 }
117 return isolate->factory()->NewConsString(first_part, second_part);
118}
119
120
121int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
122 if (character1 > 'f') return -1;
123 int hi = kHexValue[character1];
124 if (hi == -1) return -1;
125 if (character2 > 'f') return -1;
126 int lo = kHexValue[character2];
127 if (lo == -1) return -1;
128 return (hi << 4) + lo;
129}
130
131
132template <typename Char>
133int URIUnescape::UnescapeChar(Vector<const Char> vector, int i, int length,
134 int* step) {
135 uint16_t character = vector[i];
136 int32_t hi = 0;
137 int32_t lo = 0;
138 if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
139 (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) != -1 &&
140 (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) != -1) {
141 *step = 6;
142 return (hi << 8) + lo;
143 } else if (character == '%' && i <= length - 3 &&
144 (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) != -1) {
145 *step = 3;
146 return lo;
147 } else {
148 *step = 1;
149 return character;
150 }
151}
152
153
154class URIEscape : public AllStatic {
155 public:
156 template <typename Char>
157 MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate,
158 Handle<String> string);
159
160 private:
161 static const char kHexChars[17];
162 static const char kNotEscaped[256];
163
164 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
165};
166
167
168const char URIEscape::kHexChars[] = "0123456789ABCDEF";
169
170
171// kNotEscaped is generated by the following:
172//
173// #!/bin/perl
174// for (my $i = 0; $i < 256; $i++) {
175// print "\n" if $i % 16 == 0;
176// my $c = chr($i);
177// my $escaped = 1;
178// $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
179// print $escaped ? "0, " : "1, ";
180// }
181
182const char URIEscape::kNotEscaped[] = {
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
185 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
186 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
187 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
188 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
191 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
193 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
194
195
196template <typename Char>
197MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
198 DCHECK(string->IsFlat());
199 int escaped_length = 0;
200 int length = string->length();
201
202 {
203 DisallowHeapAllocation no_allocation;
204 Vector<const Char> vector = string->GetCharVector<Char>();
205 for (int i = 0; i < length; i++) {
206 uint16_t c = vector[i];
207 if (c >= 256) {
208 escaped_length += 6;
209 } else if (IsNotEscaped(c)) {
210 escaped_length++;
211 } else {
212 escaped_length += 3;
213 }
214
215 // We don't allow strings that are longer than a maximal length.
216 DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
217 if (escaped_length > String::kMaxLength) break; // Provoke exception.
218 }
219 }
220
221 // No length change implies no change. Return original string if no change.
222 if (escaped_length == length) return string;
223
224 Handle<SeqOneByteString> dest;
225 ASSIGN_RETURN_ON_EXCEPTION(
226 isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
227 String);
228 int dest_position = 0;
229
230 {
231 DisallowHeapAllocation no_allocation;
232 Vector<const Char> vector = string->GetCharVector<Char>();
233 for (int i = 0; i < length; i++) {
234 uint16_t c = vector[i];
235 if (c >= 256) {
236 dest->SeqOneByteStringSet(dest_position, '%');
237 dest->SeqOneByteStringSet(dest_position + 1, 'u');
238 dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c >> 12]);
239 dest->SeqOneByteStringSet(dest_position + 3, kHexChars[(c >> 8) & 0xf]);
240 dest->SeqOneByteStringSet(dest_position + 4, kHexChars[(c >> 4) & 0xf]);
241 dest->SeqOneByteStringSet(dest_position + 5, kHexChars[c & 0xf]);
242 dest_position += 6;
243 } else if (IsNotEscaped(c)) {
244 dest->SeqOneByteStringSet(dest_position, c);
245 dest_position++;
246 } else {
247 dest->SeqOneByteStringSet(dest_position, '%');
248 dest->SeqOneByteStringSet(dest_position + 1, kHexChars[c >> 4]);
249 dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c & 0xf]);
250 dest_position += 3;
251 }
252 }
253 }
254
255 return dest;
256}
257
258
259RUNTIME_FUNCTION(Runtime_URIEscape) {
260 HandleScope scope(isolate);
261 DCHECK(args.length() == 1);
262 CONVERT_ARG_HANDLE_CHECKED(String, source, 0);
263 Handle<String> string = String::Flatten(source);
264 DCHECK(string->IsFlat());
265 Handle<String> result;
266 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
267 isolate, result, string->IsOneByteRepresentationUnderneath()
268 ? URIEscape::Escape<uint8_t>(isolate, source)
269 : URIEscape::Escape<uc16>(isolate, source));
270 return *result;
271}
272
273
274RUNTIME_FUNCTION(Runtime_URIUnescape) {
275 HandleScope scope(isolate);
276 DCHECK(args.length() == 1);
277 CONVERT_ARG_HANDLE_CHECKED(String, source, 0);
278 Handle<String> string = String::Flatten(source);
279 DCHECK(string->IsFlat());
280 Handle<String> result;
281 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
282 isolate, result, string->IsOneByteRepresentationUnderneath()
283 ? URIUnescape::Unescape<uint8_t>(isolate, source)
284 : URIUnescape::Unescape<uc16>(isolate, source));
285 return *result;
286}
287}
288} // namespace v8::internal