blob: 28ef8b33c8be019fb465830650d48bf41c40b23b [file] [log] [blame]
danno@chromium.org40cb8782011-05-25 07:58:50 +00001// Copyright 2011 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_JSON_PARSER_H_
29#define V8_JSON_PARSER_H_
30
jkummerow@chromium.orge297f592011-06-08 10:05:15 +000031#include "v8.h"
32
33#include "char-predicates-inl.h"
jkummerow@chromium.orgddda9e82011-07-06 11:27:02 +000034#include "v8conversions.h"
jkummerow@chromium.orge297f592011-06-08 10:05:15 +000035#include "messages.h"
36#include "spaces-inl.h"
danno@chromium.org40cb8782011-05-25 07:58:50 +000037#include "token.h"
38
39namespace v8 {
40namespace internal {
41
42// A simple json parser.
jkummerow@chromium.orge297f592011-06-08 10:05:15 +000043template <bool seq_ascii>
danno@chromium.org40cb8782011-05-25 07:58:50 +000044class JsonParser BASE_EMBEDDED {
45 public:
mmassi@chromium.org7028c052012-06-13 11:51:58 +000046 static Handle<Object> Parse(Handle<String> source, Zone* zone) {
47 return JsonParser().ParseJson(source, zone);
danno@chromium.org40cb8782011-05-25 07:58:50 +000048 }
49
50 static const int kEndOfString = -1;
51
52 private:
53 // Parse a string containing a single JSON value.
mmassi@chromium.org7028c052012-06-13 11:51:58 +000054 Handle<Object> ParseJson(Handle<String> source, Zone* zone);
danno@chromium.org40cb8782011-05-25 07:58:50 +000055
56 inline void Advance() {
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000057 position_++;
vegorov@chromium.org3cf47312011-06-29 13:20:01 +000058 if (position_ >= source_length_) {
danno@chromium.org40cb8782011-05-25 07:58:50 +000059 c0_ = kEndOfString;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +000060 } else if (seq_ascii) {
yangguo@chromium.orgfb377212012-11-16 14:43:43 +000061 c0_ = seq_source_->SeqOneByteStringGet(position_);
danno@chromium.org40cb8782011-05-25 07:58:50 +000062 } else {
danno@chromium.org40cb8782011-05-25 07:58:50 +000063 c0_ = source_->Get(position_);
64 }
65 }
66
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000067 // The JSON lexical grammar is specified in the ECMAScript 5 standard,
68 // section 15.12.1.1. The only allowed whitespace characters between tokens
69 // are tab, carriage-return, newline and space.
danno@chromium.org40cb8782011-05-25 07:58:50 +000070
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000071 inline void AdvanceSkipWhitespace() {
72 do {
73 Advance();
verwaest@chromium.org33e09c82012-10-10 17:07:22 +000074 } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r');
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000075 }
danno@chromium.org40cb8782011-05-25 07:58:50 +000076
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000077 inline void SkipWhitespace() {
verwaest@chromium.org33e09c82012-10-10 17:07:22 +000078 while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') {
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000079 Advance();
80 }
81 }
82
83 inline uc32 AdvanceGetChar() {
84 Advance();
85 return c0_;
86 }
87
88 // Checks that current charater is c.
89 // If so, then consume c and skip whitespace.
90 inline bool MatchSkipWhiteSpace(uc32 c) {
91 if (c0_ == c) {
92 AdvanceSkipWhitespace();
93 return true;
94 }
95 return false;
96 }
danno@chromium.org40cb8782011-05-25 07:58:50 +000097
98 // A JSON string (production JSONString) is subset of valid JavaScript string
99 // literals. The string must only be double-quoted (not single-quoted), and
100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000102 Handle<String> ParseJsonString() {
103 return ScanJsonString<false>();
104 }
yangguo@chromium.org4a9f6552013-03-04 14:46:33 +0000105 Handle<String> ParseJsonInternalizedString() {
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000106 return ScanJsonString<true>();
107 }
yangguo@chromium.org4a9f6552013-03-04 14:46:33 +0000108 template <bool is_internalized>
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000109 Handle<String> ScanJsonString();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000110 // Creates a new string and copies prefix[start..end] into the beginning
111 // of it. Then scans the rest of the string, adding characters after the
112 // prefix. Called by ScanJsonString when reaching a '\' or non-ASCII char.
113 template <typename StringType, typename SinkChar>
114 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
danno@chromium.org40cb8782011-05-25 07:58:50 +0000115
116 // A JSON number (production JSONNumber) is a subset of the valid JavaScript
117 // decimal number literals.
118 // It includes an optional minus sign, must have at least one
119 // digit before and after a decimal point, may not have prefixed zeros (unless
120 // the integer part is zero), and may include an exponent part (e.g., "e-10").
121 // Hexadecimal and octal numbers are not allowed.
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000122 Handle<Object> ParseJsonNumber();
danno@chromium.org40cb8782011-05-25 07:58:50 +0000123
124 // Parse a single JSON value from input (grammar production JSONValue).
125 // A JSON value is either a (double-quoted) string literal, a number literal,
126 // one of "true", "false", or "null", or an object or array literal.
127 Handle<Object> ParseJsonValue();
128
129 // Parse a JSON object literal (grammar production JSONObject).
130 // An object literal is a squiggly-braced and comma separated sequence
131 // (possibly empty) of key/value pairs, where the key is a JSON string
132 // literal, the value is a JSON value, and the two are separated by a colon.
ulan@chromium.org2efb9002012-01-19 15:36:35 +0000133 // A JSON array doesn't allow numbers and identifiers as keys, like a
danno@chromium.org40cb8782011-05-25 07:58:50 +0000134 // JavaScript array.
135 Handle<Object> ParseJsonObject();
136
137 // Parses a JSON array literal (grammar production JSONArray). An array
138 // literal is a square-bracketed and comma separated sequence (possibly empty)
139 // of JSON values.
140 // A JSON array doesn't allow leaving out values from the sequence, nor does
141 // it allow a terminal comma, like a JavaScript array does.
142 Handle<Object> ParseJsonArray();
143
144
145 // Mark that a parsing error has happened at the current token, and
146 // return a null handle. Primarily for readability.
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000147 inline Handle<Object> ReportUnexpectedCharacter() {
148 return Handle<Object>::null();
149 }
danno@chromium.org40cb8782011-05-25 07:58:50 +0000150
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000151 inline Isolate* isolate() { return isolate_; }
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000152 inline Factory* factory() { return factory_; }
153 inline Handle<JSFunction> object_constructor() { return object_constructor_; }
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000154 inline Zone* zone() const { return zone_; }
danno@chromium.org40cb8782011-05-25 07:58:50 +0000155
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000156 static const int kInitialSpecialStringLength = 1024;
danno@chromium.org72204d52012-10-31 10:02:10 +0000157 static const int kPretenureTreshold = 100 * 1024;
danno@chromium.org40cb8782011-05-25 07:58:50 +0000158
159
160 private:
161 Handle<String> source_;
162 int source_length_;
yangguo@chromium.orgfb377212012-11-16 14:43:43 +0000163 Handle<SeqOneByteString> seq_source_;
danno@chromium.org40cb8782011-05-25 07:58:50 +0000164
danno@chromium.org72204d52012-10-31 10:02:10 +0000165 PretenureFlag pretenure_;
danno@chromium.org40cb8782011-05-25 07:58:50 +0000166 Isolate* isolate_;
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000167 Factory* factory_;
168 Handle<JSFunction> object_constructor_;
danno@chromium.org40cb8782011-05-25 07:58:50 +0000169 uc32 c0_;
170 int position_;
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000171 Zone* zone_;
danno@chromium.org40cb8782011-05-25 07:58:50 +0000172};
173
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000174template <bool seq_ascii>
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000175Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source,
176 Zone* zone) {
erik.corry@gmail.comc3b670f2011-10-05 21:44:48 +0000177 isolate_ = source->map()->GetHeap()->isolate();
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000178 factory_ = isolate_->factory();
verwaest@chromium.orge4ee6de2012-11-06 12:13:00 +0000179 object_constructor_ = Handle<JSFunction>(
180 isolate()->native_context()->object_function(), isolate());
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000181 zone_ = zone;
ricow@chromium.org2c99e282011-07-28 09:15:17 +0000182 FlattenString(source);
183 source_ = source;
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000184 source_length_ = source_->length();
danno@chromium.org72204d52012-10-31 10:02:10 +0000185 pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000186
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000187 // Optimized fast case where we only have ASCII characters.
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000188 if (seq_ascii) {
yangguo@chromium.orgfb377212012-11-16 14:43:43 +0000189 seq_source_ = Handle<SeqOneByteString>::cast(source_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000190 }
191
192 // Set initial position right before the string.
193 position_ = -1;
ulan@chromium.org2efb9002012-01-19 15:36:35 +0000194 // Advance to the first character (possibly EOS)
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000195 AdvanceSkipWhitespace();
196 Handle<Object> result = ParseJsonValue();
197 if (result.is_null() || c0_ != kEndOfString) {
danno@chromium.org72204d52012-10-31 10:02:10 +0000198 // Some exception (for example stack overflow) is already pending.
199 if (isolate_->has_pending_exception()) return Handle<Object>::null();
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000200
danno@chromium.org72204d52012-10-31 10:02:10 +0000201 // Parse failed. Current character is the unexpected token.
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000202 const char* message;
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000203 Factory* factory = this->factory();
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000204 Handle<JSArray> array;
205
206 switch (c0_) {
207 case kEndOfString:
208 message = "unexpected_eos";
209 array = factory->NewJSArray(0);
210 break;
211 case '-':
212 case '0':
213 case '1':
214 case '2':
215 case '3':
216 case '4':
217 case '5':
218 case '6':
219 case '7':
220 case '8':
221 case '9':
222 message = "unexpected_token_number";
223 array = factory->NewJSArray(0);
224 break;
225 case '"':
226 message = "unexpected_token_string";
227 array = factory->NewJSArray(0);
228 break;
229 default:
230 message = "unexpected_token";
ulan@chromium.org09d7ab52013-02-25 15:50:35 +0000231 Handle<Object> name =
232 LookupSingleCharacterStringFromCode(isolate_, c0_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000233 Handle<FixedArray> element = factory->NewFixedArray(1);
234 element->set(0, *name);
235 array = factory->NewJSArrayWithElements(element);
236 break;
237 }
238
239 MessageLocation location(factory->NewScript(source),
240 position_,
241 position_ + 1);
242 Handle<Object> result = factory->NewSyntaxError(message, array);
243 isolate()->Throw(*result, &location);
244 return Handle<Object>::null();
245 }
246 return result;
247}
248
249
250// Parse any JSON value.
251template <bool seq_ascii>
252Handle<Object> JsonParser<seq_ascii>::ParseJsonValue() {
danno@chromium.org72204d52012-10-31 10:02:10 +0000253 StackLimitCheck stack_check(isolate_);
254 if (stack_check.HasOverflowed()) {
255 isolate_->StackOverflow();
256 return Handle<Object>::null();
257 }
258
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000259 if (c0_ == '"') return ParseJsonString();
260 if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
261 if (c0_ == '{') return ParseJsonObject();
262 if (c0_ == '[') return ParseJsonArray();
263 if (c0_ == 'f') {
264 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
265 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
266 AdvanceSkipWhitespace();
267 return factory()->false_value();
268 }
269 return ReportUnexpectedCharacter();
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000270 }
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000271 if (c0_ == 't') {
272 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
273 AdvanceGetChar() == 'e') {
274 AdvanceSkipWhitespace();
275 return factory()->true_value();
276 }
277 return ReportUnexpectedCharacter();
278 }
279 if (c0_ == 'n') {
280 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
281 AdvanceGetChar() == 'l') {
282 AdvanceSkipWhitespace();
283 return factory()->null_value();
284 }
285 return ReportUnexpectedCharacter();
286 }
287 return ReportUnexpectedCharacter();
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000288}
289
290
291// Parse a JSON object. Position must be right at '{'.
292template <bool seq_ascii>
293Handle<Object> JsonParser<seq_ascii>::ParseJsonObject() {
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000294 Handle<JSObject> json_object =
danno@chromium.org72204d52012-10-31 10:02:10 +0000295 factory()->NewJSObject(object_constructor(), pretenure_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000296 ASSERT_EQ(c0_, '{');
297
298 AdvanceSkipWhitespace();
299 if (c0_ != '}') {
300 do {
301 if (c0_ != '"') return ReportUnexpectedCharacter();
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000302
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000303 int start_position = position_;
304 Advance();
305
306 uint32_t index = 0;
danno@chromium.org72204d52012-10-31 10:02:10 +0000307 if (c0_ >= '0' && c0_ <= '9') {
308 // Maybe an array index, try to parse it.
309 if (c0_ == '0') {
310 // With a leading zero, the string has to be "0" only to be an index.
311 Advance();
312 } else {
313 do {
314 int d = c0_ - '0';
315 if (index > 429496729U - ((d > 5) ? 1 : 0)) break;
316 index = (index * 10) + d;
317 Advance();
318 } while (c0_ >= '0' && c0_ <= '9');
319 }
320
321 if (c0_ == '"') {
322 // Successfully parsed index, parse and store element.
323 AdvanceSkipWhitespace();
324
325 if (c0_ != ':') return ReportUnexpectedCharacter();
326 AdvanceSkipWhitespace();
327 Handle<Object> value = ParseJsonValue();
328 if (value.is_null()) return ReportUnexpectedCharacter();
329
330 JSObject::SetOwnElement(json_object, index, value, kNonStrictMode);
331 continue;
332 }
333 // Not an index, fallback to the slow path.
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000334 }
335
danno@chromium.org72204d52012-10-31 10:02:10 +0000336 position_ = start_position;
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000337#ifdef DEBUG
danno@chromium.org72204d52012-10-31 10:02:10 +0000338 c0_ = '"';
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000339#endif
340
yangguo@chromium.org4a9f6552013-03-04 14:46:33 +0000341 Handle<String> key = ParseJsonInternalizedString();
danno@chromium.org72204d52012-10-31 10:02:10 +0000342 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000343
danno@chromium.org72204d52012-10-31 10:02:10 +0000344 AdvanceSkipWhitespace();
345 Handle<Object> value = ParseJsonValue();
346 if (value.is_null()) return ReportUnexpectedCharacter();
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000347
mstarzinger@chromium.org71fc3462013-02-27 09:34:27 +0000348 if (JSObject::TryTransitionToField(json_object, key)) {
349 int index = json_object->LastAddedFieldIndex();
350 json_object->FastPropertyAtPut(index, *value);
danno@chromium.org72204d52012-10-31 10:02:10 +0000351 } else {
mstarzinger@chromium.org71fc3462013-02-27 09:34:27 +0000352 JSObject::SetLocalPropertyIgnoreAttributes(
353 json_object, key, value, NONE);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000354 }
355 } while (MatchSkipWhiteSpace(','));
356 if (c0_ != '}') {
357 return ReportUnexpectedCharacter();
358 }
359 }
360 AdvanceSkipWhitespace();
361 return json_object;
362}
363
364// Parse a JSON array. Position must be right at '['.
365template <bool seq_ascii>
366Handle<Object> JsonParser<seq_ascii>::ParseJsonArray() {
yangguo@chromium.org5a11aaf2012-06-20 11:29:00 +0000367 ZoneScope zone_scope(zone(), DELETE_ON_EXIT);
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000368 ZoneList<Handle<Object> > elements(4, zone());
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000369 ASSERT_EQ(c0_, '[');
370
371 AdvanceSkipWhitespace();
372 if (c0_ != ']') {
373 do {
374 Handle<Object> element = ParseJsonValue();
375 if (element.is_null()) return ReportUnexpectedCharacter();
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000376 elements.Add(element, zone());
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000377 } while (MatchSkipWhiteSpace(','));
378 if (c0_ != ']') {
379 return ReportUnexpectedCharacter();
380 }
381 }
382 AdvanceSkipWhitespace();
383 // Allocate a fixed array with all the elements.
384 Handle<FixedArray> fast_elements =
danno@chromium.org72204d52012-10-31 10:02:10 +0000385 factory()->NewFixedArray(elements.length(), pretenure_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000386 for (int i = 0, n = elements.length(); i < n; i++) {
387 fast_elements->set(i, *elements[i]);
388 }
danno@chromium.org72204d52012-10-31 10:02:10 +0000389 return factory()->NewJSArrayWithElements(
390 fast_elements, FAST_ELEMENTS, pretenure_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000391}
392
393
394template <bool seq_ascii>
395Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() {
396 bool negative = false;
ager@chromium.org04921a82011-06-27 13:21:41 +0000397 int beg_pos = position_;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000398 if (c0_ == '-') {
399 Advance();
400 negative = true;
401 }
402 if (c0_ == '0') {
403 Advance();
404 // Prefix zero is only allowed if it's the only digit before
405 // a decimal point or exponent.
406 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter();
407 } else {
408 int i = 0;
409 int digits = 0;
410 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
411 do {
412 i = i * 10 + c0_ - '0';
413 digits++;
414 Advance();
415 } while (c0_ >= '0' && c0_ <= '9');
416 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000417 SkipWhitespace();
ager@chromium.org04921a82011-06-27 13:21:41 +0000418 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000419 }
420 }
421 if (c0_ == '.') {
422 Advance();
423 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
424 do {
425 Advance();
426 } while (c0_ >= '0' && c0_ <= '9');
427 }
428 if (AsciiAlphaToLower(c0_) == 'e') {
429 Advance();
430 if (c0_ == '-' || c0_ == '+') Advance();
431 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
432 do {
433 Advance();
434 } while (c0_ >= '0' && c0_ <= '9');
435 }
ager@chromium.org04921a82011-06-27 13:21:41 +0000436 int length = position_ - beg_pos;
437 double number;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000438 if (seq_ascii) {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000439 Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length);
ager@chromium.org04921a82011-06-27 13:21:41 +0000440 number = StringToDouble(isolate()->unicode_cache(),
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000441 Vector<const char>::cast(chars),
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000442 NO_FLAGS, // Hex, octal or trailing junk.
443 OS::nan_value());
444 } else {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000445 Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
ager@chromium.org04921a82011-06-27 13:21:41 +0000446 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000447 Vector<const uint8_t> result =
448 Vector<const uint8_t>(buffer.start(), length);
ager@chromium.org04921a82011-06-27 13:21:41 +0000449 number = StringToDouble(isolate()->unicode_cache(),
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000450 // TODO(dcarney): Convert StringToDouble to uint_t.
451 Vector<const char>::cast(result),
452 NO_FLAGS, // Hex, octal or trailing junk.
453 0.0);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000454 buffer.Dispose();
455 }
456 SkipWhitespace();
danno@chromium.org72204d52012-10-31 10:02:10 +0000457 return factory()->NewNumber(number, pretenure_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000458}
459
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000460
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000461template <typename StringType>
462inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
463
464template <>
465inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
466 seq_str->SeqTwoByteStringSet(i, c);
467}
468
469template <>
yangguo@chromium.orgfb377212012-11-16 14:43:43 +0000470inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) {
471 seq_str->SeqOneByteStringSet(i, c);
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000472}
473
474template <typename StringType>
danno@chromium.org72204d52012-10-31 10:02:10 +0000475inline Handle<StringType> NewRawString(Factory* factory,
476 int length,
477 PretenureFlag pretenure);
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000478
479template <>
danno@chromium.org72204d52012-10-31 10:02:10 +0000480inline Handle<SeqTwoByteString> NewRawString(Factory* factory,
481 int length,
482 PretenureFlag pretenure) {
483 return factory->NewRawTwoByteString(length, pretenure);
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000484}
485
486template <>
yangguo@chromium.orgfb377212012-11-16 14:43:43 +0000487inline Handle<SeqOneByteString> NewRawString(Factory* factory,
danno@chromium.org72204d52012-10-31 10:02:10 +0000488 int length,
489 PretenureFlag pretenure) {
yangguo@chromium.orgfb377212012-11-16 14:43:43 +0000490 return factory->NewRawOneByteString(length, pretenure);
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000491}
492
493
494// Scans the rest of a JSON string starting from position_ and writes
495// prefix[start..end] along with the scanned characters into a
496// sequential string of type StringType.
497template <bool seq_ascii>
498template <typename StringType, typename SinkChar>
499Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(
500 Handle<String> prefix, int start, int end) {
501 int count = end - start;
502 int max_length = count + source_length_ - position_;
503 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
danno@chromium.org72204d52012-10-31 10:02:10 +0000504 Handle<StringType> seq_str =
505 NewRawString<StringType>(factory(), length, pretenure_);
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000506 // Copy prefix into seq_str.
507 SinkChar* dest = seq_str->GetChars();
508 String::WriteToFlat(*prefix, dest, start, end);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000509
510 while (c0_ != '"') {
kmillikin@chromium.org7c2628c2011-08-10 11:27:35 +0000511 // Check for control character (0x00-0x1f) or unterminated string (<0).
512 if (c0_ < 0x20) return Handle<String>::null();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000513 if (count >= length) {
514 // We need to create a longer sequential string for the result.
515 return SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000516 }
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000517 if (c0_ != '\\') {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000518 // If the sink can contain UC16 characters, or source_ contains only
519 // ASCII characters, there's no need to test whether we can store the
520 // character. Otherwise check whether the UC16 source character can fit
521 // in the ASCII sink.
522 if (sizeof(SinkChar) == kUC16Size ||
523 seq_ascii ||
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000524 c0_ <= String::kMaxOneByteCharCode) {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000525 SeqStringSet(seq_str, count++, c0_);
526 Advance();
527 } else {
yangguo@chromium.orgfb377212012-11-16 14:43:43 +0000528 // StringType is SeqOneByteString and we just read a non-ASCII char.
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000529 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 0, count);
530 }
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000531 } else {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000532 Advance(); // Advance past the \.
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000533 switch (c0_) {
534 case '"':
535 case '\\':
536 case '/':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000537 SeqStringSet(seq_str, count++, c0_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000538 break;
539 case 'b':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000540 SeqStringSet(seq_str, count++, '\x08');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000541 break;
542 case 'f':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000543 SeqStringSet(seq_str, count++, '\x0c');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000544 break;
545 case 'n':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000546 SeqStringSet(seq_str, count++, '\x0a');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000547 break;
548 case 'r':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000549 SeqStringSet(seq_str, count++, '\x0d');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000550 break;
551 case 't':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000552 SeqStringSet(seq_str, count++, '\x09');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000553 break;
554 case 'u': {
555 uc32 value = 0;
556 for (int i = 0; i < 4; i++) {
557 Advance();
558 int digit = HexValue(c0_);
559 if (digit < 0) {
560 return Handle<String>::null();
561 }
562 value = value * 16 + digit;
563 }
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000564 if (sizeof(SinkChar) == kUC16Size ||
565 value <= String::kMaxOneByteCharCode) {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000566 SeqStringSet(seq_str, count++, value);
567 break;
568 } else {
yangguo@chromium.orgfb377212012-11-16 14:43:43 +0000569 // StringType is SeqOneByteString and we just read a non-ASCII char.
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000570 position_ -= 6; // Rewind position_ to \ in \uxxxx.
571 Advance();
572 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str,
573 0,
574 count);
575 }
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000576 }
577 default:
578 return Handle<String>::null();
579 }
580 Advance();
581 }
582 }
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000583 // Shrink seq_string length to count.
584 if (isolate()->heap()->InNewSpace(*seq_str)) {
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000585 isolate()->heap()->new_space()->
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000586 template ShrinkStringAtAllocationBoundary<StringType>(
587 *seq_str, count);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000588 } else {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000589 int string_size = StringType::SizeFor(count);
590 int allocated_string_size = StringType::SizeFor(length);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000591 int delta = allocated_string_size - string_size;
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000592 Address start_filler_object = seq_str->address() + string_size;
593 seq_str->set_length(count);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000594 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
595 }
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000596 ASSERT_EQ('"', c0_);
597 // Advance past the last '"'.
598 AdvanceSkipWhitespace();
599 return seq_str;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000600}
601
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000602
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000603template <bool seq_ascii>
yangguo@chromium.org4a9f6552013-03-04 14:46:33 +0000604template <bool is_internalized>
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000605Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
606 ASSERT_EQ('"', c0_);
607 Advance();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000608 if (c0_ == '"') {
609 AdvanceSkipWhitespace();
verwaest@chromium.orge4ee6de2012-11-06 12:13:00 +0000610 return factory()->empty_string();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000611 }
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000612
yangguo@chromium.org4a9f6552013-03-04 14:46:33 +0000613 if (seq_ascii && is_internalized) {
614 // Fast path for existing internalized strings. If the the string being
615 // parsed is not a known internalized string, contains backslashes or
616 // unexpectedly reaches the end of string, return with an empty handle.
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000617 uint32_t running_hash = isolate()->heap()->HashSeed();
618 int position = position_;
619 uc32 c0 = c0_;
620 do {
621 if (c0 == '\\') {
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000622 c0_ = c0;
623 int beg_pos = position_;
624 position_ = position;
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000625 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
626 beg_pos,
627 position_);
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000628 }
rossberg@chromium.org89e18f52012-10-22 13:09:53 +0000629 if (c0 < 0x20) return Handle<String>::null();
yangguo@chromium.orga6bbcc82012-12-21 12:35:02 +0000630 if (static_cast<uint32_t>(c0) >
631 unibrow::Utf16::kMaxNonSurrogateCharCode) {
632 running_hash =
633 StringHasher::AddCharacterCore(running_hash,
634 unibrow::Utf16::LeadSurrogate(c0));
635 running_hash =
636 StringHasher::AddCharacterCore(running_hash,
637 unibrow::Utf16::TrailSurrogate(c0));
638 } else {
639 running_hash = StringHasher::AddCharacterCore(running_hash, c0);
640 }
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000641 position++;
642 if (position >= source_length_) return Handle<String>::null();
yangguo@chromium.orgfb377212012-11-16 14:43:43 +0000643 c0 = seq_source_->SeqOneByteStringGet(position);
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000644 } while (c0 != '"');
645 int length = position - position_;
646 uint32_t hash = (length <= String::kMaxHashCalcLength)
647 ? StringHasher::GetHashCore(running_hash) : length;
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000648 Vector<const uint8_t> string_vector(
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000649 seq_source_->GetChars() + position_, length);
yangguo@chromium.org4a9f6552013-03-04 14:46:33 +0000650 StringTable* string_table = isolate()->heap()->string_table();
651 uint32_t capacity = string_table->Capacity();
652 uint32_t entry = StringTable::FirstProbe(hash, capacity);
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000653 uint32_t count = 1;
654 while (true) {
yangguo@chromium.org4a9f6552013-03-04 14:46:33 +0000655 Object* element = string_table->KeyAt(entry);
danno@chromium.org72204d52012-10-31 10:02:10 +0000656 if (element == isolate()->heap()->undefined_value()) {
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000657 // Lookup failure.
658 break;
659 }
danno@chromium.org72204d52012-10-31 10:02:10 +0000660 if (element != isolate()->heap()->the_hole_value() &&
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000661 String::cast(element)->IsOneByteEqualTo(string_vector)) {
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000662 // Lookup success, update the current position.
663 position_ = position;
664 // Advance past the last '"'.
665 AdvanceSkipWhitespace();
verwaest@chromium.orge4ee6de2012-11-06 12:13:00 +0000666 return Handle<String>(String::cast(element), isolate());
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000667 }
yangguo@chromium.org4a9f6552013-03-04 14:46:33 +0000668 entry = StringTable::NextProbe(entry, count++, capacity);
verwaest@chromium.org33e09c82012-10-10 17:07:22 +0000669 }
670 }
671
ager@chromium.org04921a82011-06-27 13:21:41 +0000672 int beg_pos = position_;
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000673 // Fast case for ASCII only without escape characters.
674 do {
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000675 // Check for control character (0x00-0x1f) or unterminated string (<0).
676 if (c0_ < 0x20) return Handle<String>::null();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000677 if (c0_ != '\\') {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000678 if (seq_ascii || c0_ <= String::kMaxOneByteCharCode) {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000679 Advance();
680 } else {
681 return SlowScanJsonString<SeqTwoByteString, uc16>(source_,
682 beg_pos,
683 position_);
684 }
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000685 } else {
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000686 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
687 beg_pos,
688 position_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000689 }
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000690 } while (c0_ != '"');
691 int length = position_ - beg_pos;
692 Handle<String> result;
yangguo@chromium.org4a9f6552013-03-04 14:46:33 +0000693 if (seq_ascii && is_internalized) {
694 result = factory()->InternalizeOneByteString(seq_source_, beg_pos, length);
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000695 } else {
yangguo@chromium.orgfb377212012-11-16 14:43:43 +0000696 result = factory()->NewRawOneByteString(length, pretenure_);
jkummerow@chromium.org59297c72013-01-09 16:32:23 +0000697 uint8_t* dest = SeqOneByteString::cast(*result)->GetChars();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000698 String::WriteToFlat(*source_, dest, beg_pos, position_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000699 }
700 ASSERT_EQ('"', c0_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000701 // Advance past the last '"'.
702 AdvanceSkipWhitespace();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000703 return result;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000704}
705
danno@chromium.org40cb8782011-05-25 07:58:50 +0000706} } // namespace v8::internal
707
708#endif // V8_JSON_PARSER_H_