blob: e64e9dcea7cb26d83affa4b61305f4351c2b6ed1 [file] [log] [blame]
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00005#include "src/runtime/runtime-utils.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -04006
7#include "src/arguments.h"
8#include "src/conversions.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00009#include "src/isolate-inl.h"
10#include "src/objects-inl.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040011#include "src/string-search.h"
12#include "src/utils.h"
13
Emily Bernierd0a1eb72015-03-24 16:35:39 -040014namespace v8 {
15namespace internal {
16
17class URIUnescape : public AllStatic {
18 public:
19 template <typename Char>
20 MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate,
21 Handle<String> source);
22
23 private:
24 static const signed char kHexValue['g'];
25
26 template <typename Char>
27 MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(Isolate* isolate,
28 Handle<String> string,
29 int start_index);
30
31 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
32
33 template <typename Char>
34 static INLINE(int UnescapeChar(Vector<const Char> vector, int i, int length,
35 int* step));
36};
37
38
39const signed char URIUnescape::kHexValue[] = {
40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -0, 1, 2, 3, 4, 5,
43 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1,
44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
45 -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15};
46
47
48template <typename Char>
49MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate,
50 Handle<String> source) {
51 int index;
52 {
53 DisallowHeapAllocation no_allocation;
54 StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
55 index = search.Search(source->GetCharVector<Char>(), 0);
56 if (index < 0) return source;
57 }
58 return UnescapeSlow<Char>(isolate, source, index);
59}
60
61
62template <typename Char>
63MaybeHandle<String> URIUnescape::UnescapeSlow(Isolate* isolate,
64 Handle<String> string,
65 int start_index) {
66 bool one_byte = true;
67 int length = string->length();
68
69 int unescaped_length = 0;
70 {
71 DisallowHeapAllocation no_allocation;
72 Vector<const Char> vector = string->GetCharVector<Char>();
73 for (int i = start_index; i < length; unescaped_length++) {
74 int step;
75 if (UnescapeChar(vector, i, length, &step) >
76 String::kMaxOneByteCharCode) {
77 one_byte = false;
78 }
79 i += step;
80 }
81 }
82
83 DCHECK(start_index < length);
84 Handle<String> first_part =
85 isolate->factory()->NewProperSubString(string, 0, start_index);
86
87 int dest_position = 0;
88 Handle<String> second_part;
89 DCHECK(unescaped_length <= String::kMaxLength);
90 if (one_byte) {
91 Handle<SeqOneByteString> dest = isolate->factory()
92 ->NewRawOneByteString(unescaped_length)
93 .ToHandleChecked();
94 DisallowHeapAllocation no_allocation;
95 Vector<const Char> vector = string->GetCharVector<Char>();
96 for (int i = start_index; i < length; dest_position++) {
97 int step;
98 dest->SeqOneByteStringSet(dest_position,
99 UnescapeChar(vector, i, length, &step));
100 i += step;
101 }
102 second_part = dest;
103 } else {
104 Handle<SeqTwoByteString> dest = isolate->factory()
105 ->NewRawTwoByteString(unescaped_length)
106 .ToHandleChecked();
107 DisallowHeapAllocation no_allocation;
108 Vector<const Char> vector = string->GetCharVector<Char>();
109 for (int i = start_index; i < length; dest_position++) {
110 int step;
111 dest->SeqTwoByteStringSet(dest_position,
112 UnescapeChar(vector, i, length, &step));
113 i += step;
114 }
115 second_part = dest;
116 }
117 return isolate->factory()->NewConsString(first_part, second_part);
118}
119
120
121int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
122 if (character1 > 'f') return -1;
123 int hi = kHexValue[character1];
124 if (hi == -1) return -1;
125 if (character2 > 'f') return -1;
126 int lo = kHexValue[character2];
127 if (lo == -1) return -1;
128 return (hi << 4) + lo;
129}
130
131
132template <typename Char>
133int URIUnescape::UnescapeChar(Vector<const Char> vector, int i, int length,
134 int* step) {
135 uint16_t character = vector[i];
136 int32_t hi = 0;
137 int32_t lo = 0;
138 if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
139 (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) != -1 &&
140 (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) != -1) {
141 *step = 6;
142 return (hi << 8) + lo;
143 } else if (character == '%' && i <= length - 3 &&
144 (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) != -1) {
145 *step = 3;
146 return lo;
147 } else {
148 *step = 1;
149 return character;
150 }
151}
152
153
154class URIEscape : public AllStatic {
155 public:
156 template <typename Char>
157 MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate,
158 Handle<String> string);
159
160 private:
161 static const char kHexChars[17];
162 static const char kNotEscaped[256];
163
164 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
165};
166
167
168const char URIEscape::kHexChars[] = "0123456789ABCDEF";
169
170
171// kNotEscaped is generated by the following:
172//
173// #!/bin/perl
174// for (my $i = 0; $i < 256; $i++) {
175// print "\n" if $i % 16 == 0;
176// my $c = chr($i);
177// my $escaped = 1;
178// $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
179// print $escaped ? "0, " : "1, ";
180// }
181
182const char URIEscape::kNotEscaped[] = {
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
185 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
186 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
187 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
188 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
191 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
193 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
194
195
196template <typename Char>
197MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
198 DCHECK(string->IsFlat());
199 int escaped_length = 0;
200 int length = string->length();
201
202 {
203 DisallowHeapAllocation no_allocation;
204 Vector<const Char> vector = string->GetCharVector<Char>();
205 for (int i = 0; i < length; i++) {
206 uint16_t c = vector[i];
207 if (c >= 256) {
208 escaped_length += 6;
209 } else if (IsNotEscaped(c)) {
210 escaped_length++;
211 } else {
212 escaped_length += 3;
213 }
214
215 // We don't allow strings that are longer than a maximal length.
216 DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
217 if (escaped_length > String::kMaxLength) break; // Provoke exception.
218 }
219 }
220
221 // No length change implies no change. Return original string if no change.
222 if (escaped_length == length) return string;
223
224 Handle<SeqOneByteString> dest;
225 ASSIGN_RETURN_ON_EXCEPTION(
226 isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
227 String);
228 int dest_position = 0;
229
230 {
231 DisallowHeapAllocation no_allocation;
232 Vector<const Char> vector = string->GetCharVector<Char>();
233 for (int i = 0; i < length; i++) {
234 uint16_t c = vector[i];
235 if (c >= 256) {
236 dest->SeqOneByteStringSet(dest_position, '%');
237 dest->SeqOneByteStringSet(dest_position + 1, 'u');
238 dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c >> 12]);
239 dest->SeqOneByteStringSet(dest_position + 3, kHexChars[(c >> 8) & 0xf]);
240 dest->SeqOneByteStringSet(dest_position + 4, kHexChars[(c >> 4) & 0xf]);
241 dest->SeqOneByteStringSet(dest_position + 5, kHexChars[c & 0xf]);
242 dest_position += 6;
243 } else if (IsNotEscaped(c)) {
244 dest->SeqOneByteStringSet(dest_position, c);
245 dest_position++;
246 } else {
247 dest->SeqOneByteStringSet(dest_position, '%');
248 dest->SeqOneByteStringSet(dest_position + 1, kHexChars[c >> 4]);
249 dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c & 0xf]);
250 dest_position += 3;
251 }
252 }
253 }
254
255 return dest;
256}
257
258
259RUNTIME_FUNCTION(Runtime_URIEscape) {
260 HandleScope scope(isolate);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000261 DCHECK_EQ(1, args.length());
262 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
263 Handle<String> source;
264 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, source,
265 Object::ToString(isolate, input));
266 source = String::Flatten(source);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400267 Handle<String> result;
268 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000269 isolate, result, source->IsOneByteRepresentationUnderneath()
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400270 ? URIEscape::Escape<uint8_t>(isolate, source)
271 : URIEscape::Escape<uc16>(isolate, source));
272 return *result;
273}
274
275
276RUNTIME_FUNCTION(Runtime_URIUnescape) {
277 HandleScope scope(isolate);
278 DCHECK(args.length() == 1);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000279 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
280 Handle<String> source;
281 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, source,
282 Object::ToString(isolate, input));
283 source = String::Flatten(source);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400284 Handle<String> result;
285 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000286 isolate, result, source->IsOneByteRepresentationUnderneath()
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400287 ? URIUnescape::Unescape<uint8_t>(isolate, source)
288 : URIUnescape::Unescape<uc16>(isolate, source));
289 return *result;
290}
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000291
292} // namespace internal
293} // namespace v8