blob: ba05230ceeea7655229be8cf94696cab21e857e5 [file] [log] [blame]
danno@chromium.org40cb8782011-05-25 07:58:50 +00001// Copyright 2011 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_JSON_PARSER_H_
29#define V8_JSON_PARSER_H_
30
jkummerow@chromium.orge297f592011-06-08 10:05:15 +000031#include "v8.h"
32
33#include "char-predicates-inl.h"
jkummerow@chromium.orgddda9e82011-07-06 11:27:02 +000034#include "v8conversions.h"
jkummerow@chromium.orge297f592011-06-08 10:05:15 +000035#include "messages.h"
36#include "spaces-inl.h"
danno@chromium.org40cb8782011-05-25 07:58:50 +000037#include "token.h"
38
39namespace v8 {
40namespace internal {
41
42// A simple json parser.
jkummerow@chromium.orge297f592011-06-08 10:05:15 +000043template <bool seq_ascii>
danno@chromium.org40cb8782011-05-25 07:58:50 +000044class JsonParser BASE_EMBEDDED {
45 public:
mmassi@chromium.org7028c052012-06-13 11:51:58 +000046 static Handle<Object> Parse(Handle<String> source, Zone* zone) {
47 return JsonParser().ParseJson(source, zone);
danno@chromium.org40cb8782011-05-25 07:58:50 +000048 }
49
50 static const int kEndOfString = -1;
51
52 private:
53 // Parse a string containing a single JSON value.
mmassi@chromium.org7028c052012-06-13 11:51:58 +000054 Handle<Object> ParseJson(Handle<String> source, Zone* zone);
danno@chromium.org40cb8782011-05-25 07:58:50 +000055
56 inline void Advance() {
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000057 position_++;
vegorov@chromium.org3cf47312011-06-29 13:20:01 +000058 if (position_ >= source_length_) {
danno@chromium.org40cb8782011-05-25 07:58:50 +000059 c0_ = kEndOfString;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +000060 } else if (seq_ascii) {
danno@chromium.org40cb8782011-05-25 07:58:50 +000061 c0_ = seq_source_->SeqAsciiStringGet(position_);
62 } else {
danno@chromium.org40cb8782011-05-25 07:58:50 +000063 c0_ = source_->Get(position_);
64 }
65 }
66
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000067 // The JSON lexical grammar is specified in the ECMAScript 5 standard,
68 // section 15.12.1.1. The only allowed whitespace characters between tokens
69 // are tab, carriage-return, newline and space.
danno@chromium.org40cb8782011-05-25 07:58:50 +000070
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000071 inline void AdvanceSkipWhitespace() {
72 do {
73 Advance();
74 } while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ');
75 }
danno@chromium.org40cb8782011-05-25 07:58:50 +000076
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +000077 inline void SkipWhitespace() {
78 while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ') {
79 Advance();
80 }
81 }
82
83 inline uc32 AdvanceGetChar() {
84 Advance();
85 return c0_;
86 }
87
88 // Checks that current charater is c.
89 // If so, then consume c and skip whitespace.
90 inline bool MatchSkipWhiteSpace(uc32 c) {
91 if (c0_ == c) {
92 AdvanceSkipWhitespace();
93 return true;
94 }
95 return false;
96 }
danno@chromium.org40cb8782011-05-25 07:58:50 +000097
98 // A JSON string (production JSONString) is subset of valid JavaScript string
99 // literals. The string must only be double-quoted (not single-quoted), and
100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000102 Handle<String> ParseJsonString() {
103 return ScanJsonString<false>();
104 }
105 Handle<String> ParseJsonSymbol() {
106 return ScanJsonString<true>();
107 }
108 template <bool is_symbol>
109 Handle<String> ScanJsonString();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000110 // Creates a new string and copies prefix[start..end] into the beginning
111 // of it. Then scans the rest of the string, adding characters after the
112 // prefix. Called by ScanJsonString when reaching a '\' or non-ASCII char.
113 template <typename StringType, typename SinkChar>
114 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
danno@chromium.org40cb8782011-05-25 07:58:50 +0000115
116 // A JSON number (production JSONNumber) is a subset of the valid JavaScript
117 // decimal number literals.
118 // It includes an optional minus sign, must have at least one
119 // digit before and after a decimal point, may not have prefixed zeros (unless
120 // the integer part is zero), and may include an exponent part (e.g., "e-10").
121 // Hexadecimal and octal numbers are not allowed.
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000122 Handle<Object> ParseJsonNumber();
danno@chromium.org40cb8782011-05-25 07:58:50 +0000123
124 // Parse a single JSON value from input (grammar production JSONValue).
125 // A JSON value is either a (double-quoted) string literal, a number literal,
126 // one of "true", "false", or "null", or an object or array literal.
127 Handle<Object> ParseJsonValue();
128
129 // Parse a JSON object literal (grammar production JSONObject).
130 // An object literal is a squiggly-braced and comma separated sequence
131 // (possibly empty) of key/value pairs, where the key is a JSON string
132 // literal, the value is a JSON value, and the two are separated by a colon.
ulan@chromium.org2efb9002012-01-19 15:36:35 +0000133 // A JSON array doesn't allow numbers and identifiers as keys, like a
danno@chromium.org40cb8782011-05-25 07:58:50 +0000134 // JavaScript array.
135 Handle<Object> ParseJsonObject();
136
137 // Parses a JSON array literal (grammar production JSONArray). An array
138 // literal is a square-bracketed and comma separated sequence (possibly empty)
139 // of JSON values.
140 // A JSON array doesn't allow leaving out values from the sequence, nor does
141 // it allow a terminal comma, like a JavaScript array does.
142 Handle<Object> ParseJsonArray();
143
144
145 // Mark that a parsing error has happened at the current token, and
146 // return a null handle. Primarily for readability.
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000147 inline Handle<Object> ReportUnexpectedCharacter() {
148 return Handle<Object>::null();
149 }
danno@chromium.org40cb8782011-05-25 07:58:50 +0000150
erik.corry@gmail.comd6076d92011-06-06 09:39:18 +0000151 inline Isolate* isolate() { return isolate_; }
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000152 inline Zone* zone() const { return zone_; }
danno@chromium.org40cb8782011-05-25 07:58:50 +0000153
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000154 static const int kInitialSpecialStringLength = 1024;
danno@chromium.org40cb8782011-05-25 07:58:50 +0000155
156
157 private:
158 Handle<String> source_;
159 int source_length_;
160 Handle<SeqAsciiString> seq_source_;
161
danno@chromium.org40cb8782011-05-25 07:58:50 +0000162 Isolate* isolate_;
163 uc32 c0_;
164 int position_;
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000165 Zone* zone_;
danno@chromium.org40cb8782011-05-25 07:58:50 +0000166};
167
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000168template <bool seq_ascii>
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000169Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source,
170 Zone* zone) {
erik.corry@gmail.comc3b670f2011-10-05 21:44:48 +0000171 isolate_ = source->map()->GetHeap()->isolate();
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000172 zone_ = zone;
ricow@chromium.org2c99e282011-07-28 09:15:17 +0000173 FlattenString(source);
174 source_ = source;
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000175 source_length_ = source_->length();
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000176
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000177 // Optimized fast case where we only have ASCII characters.
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000178 if (seq_ascii) {
179 seq_source_ = Handle<SeqAsciiString>::cast(source_);
180 }
181
182 // Set initial position right before the string.
183 position_ = -1;
ulan@chromium.org2efb9002012-01-19 15:36:35 +0000184 // Advance to the first character (possibly EOS)
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000185 AdvanceSkipWhitespace();
186 Handle<Object> result = ParseJsonValue();
187 if (result.is_null() || c0_ != kEndOfString) {
188 // Parse failed. Current character is the unexpected token.
189
190 const char* message;
191 Factory* factory = isolate()->factory();
192 Handle<JSArray> array;
193
194 switch (c0_) {
195 case kEndOfString:
196 message = "unexpected_eos";
197 array = factory->NewJSArray(0);
198 break;
199 case '-':
200 case '0':
201 case '1':
202 case '2':
203 case '3':
204 case '4':
205 case '5':
206 case '6':
207 case '7':
208 case '8':
209 case '9':
210 message = "unexpected_token_number";
211 array = factory->NewJSArray(0);
212 break;
213 case '"':
214 message = "unexpected_token_string";
215 array = factory->NewJSArray(0);
216 break;
217 default:
218 message = "unexpected_token";
219 Handle<Object> name = LookupSingleCharacterStringFromCode(c0_);
220 Handle<FixedArray> element = factory->NewFixedArray(1);
221 element->set(0, *name);
222 array = factory->NewJSArrayWithElements(element);
223 break;
224 }
225
226 MessageLocation location(factory->NewScript(source),
227 position_,
228 position_ + 1);
229 Handle<Object> result = factory->NewSyntaxError(message, array);
230 isolate()->Throw(*result, &location);
231 return Handle<Object>::null();
232 }
233 return result;
234}
235
236
237// Parse any JSON value.
238template <bool seq_ascii>
239Handle<Object> JsonParser<seq_ascii>::ParseJsonValue() {
240 switch (c0_) {
241 case '"':
242 return ParseJsonString();
243 case '-':
244 case '0':
245 case '1':
246 case '2':
247 case '3':
248 case '4':
249 case '5':
250 case '6':
251 case '7':
252 case '8':
253 case '9':
254 return ParseJsonNumber();
255 case 'f':
256 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
257 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
258 AdvanceSkipWhitespace();
259 return isolate()->factory()->false_value();
260 } else {
261 return ReportUnexpectedCharacter();
262 }
263 case 't':
264 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
265 AdvanceGetChar() == 'e') {
266 AdvanceSkipWhitespace();
267 return isolate()->factory()->true_value();
268 } else {
269 return ReportUnexpectedCharacter();
270 }
271 case 'n':
272 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
273 AdvanceGetChar() == 'l') {
274 AdvanceSkipWhitespace();
275 return isolate()->factory()->null_value();
276 } else {
277 return ReportUnexpectedCharacter();
278 }
279 case '{':
280 return ParseJsonObject();
281 case '[':
282 return ParseJsonArray();
283 default:
284 return ReportUnexpectedCharacter();
285 }
286}
287
288
289// Parse a JSON object. Position must be right at '{'.
290template <bool seq_ascii>
291Handle<Object> JsonParser<seq_ascii>::ParseJsonObject() {
292 Handle<JSFunction> object_constructor(
293 isolate()->global_context()->object_function());
294 Handle<JSObject> json_object =
295 isolate()->factory()->NewJSObject(object_constructor);
296 ASSERT_EQ(c0_, '{');
297
298 AdvanceSkipWhitespace();
299 if (c0_ != '}') {
300 do {
301 if (c0_ != '"') return ReportUnexpectedCharacter();
302 Handle<String> key = ParseJsonSymbol();
303 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
304 AdvanceSkipWhitespace();
305 Handle<Object> value = ParseJsonValue();
306 if (value.is_null()) return ReportUnexpectedCharacter();
307
308 uint32_t index;
309 if (key->AsArrayIndex(&index)) {
erik.corry@gmail.comf2038fb2012-01-16 11:42:08 +0000310 JSObject::SetOwnElement(json_object, index, value, kNonStrictMode);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000311 } else if (key->Equals(isolate()->heap()->Proto_symbol())) {
312 SetPrototype(json_object, value);
313 } else {
erik.corry@gmail.comf2038fb2012-01-16 11:42:08 +0000314 JSObject::SetLocalPropertyIgnoreAttributes(
315 json_object, key, value, NONE);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000316 }
317 } while (MatchSkipWhiteSpace(','));
318 if (c0_ != '}') {
319 return ReportUnexpectedCharacter();
320 }
321 }
322 AdvanceSkipWhitespace();
323 return json_object;
324}
325
326// Parse a JSON array. Position must be right at '['.
327template <bool seq_ascii>
328Handle<Object> JsonParser<seq_ascii>::ParseJsonArray() {
yangguo@chromium.org5a11aaf2012-06-20 11:29:00 +0000329 ZoneScope zone_scope(zone(), DELETE_ON_EXIT);
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000330 ZoneList<Handle<Object> > elements(4, zone());
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000331 ASSERT_EQ(c0_, '[');
332
333 AdvanceSkipWhitespace();
334 if (c0_ != ']') {
335 do {
336 Handle<Object> element = ParseJsonValue();
337 if (element.is_null()) return ReportUnexpectedCharacter();
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000338 elements.Add(element, zone());
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000339 } while (MatchSkipWhiteSpace(','));
340 if (c0_ != ']') {
341 return ReportUnexpectedCharacter();
342 }
343 }
344 AdvanceSkipWhitespace();
345 // Allocate a fixed array with all the elements.
346 Handle<FixedArray> fast_elements =
347 isolate()->factory()->NewFixedArray(elements.length());
348 for (int i = 0, n = elements.length(); i < n; i++) {
349 fast_elements->set(i, *elements[i]);
350 }
351 return isolate()->factory()->NewJSArrayWithElements(fast_elements);
352}
353
354
355template <bool seq_ascii>
356Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() {
357 bool negative = false;
ager@chromium.org04921a82011-06-27 13:21:41 +0000358 int beg_pos = position_;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000359 if (c0_ == '-') {
360 Advance();
361 negative = true;
362 }
363 if (c0_ == '0') {
364 Advance();
365 // Prefix zero is only allowed if it's the only digit before
366 // a decimal point or exponent.
367 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter();
368 } else {
369 int i = 0;
370 int digits = 0;
371 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
372 do {
373 i = i * 10 + c0_ - '0';
374 digits++;
375 Advance();
376 } while (c0_ >= '0' && c0_ <= '9');
377 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000378 SkipWhitespace();
ager@chromium.org04921a82011-06-27 13:21:41 +0000379 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000380 }
381 }
382 if (c0_ == '.') {
383 Advance();
384 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
385 do {
386 Advance();
387 } while (c0_ >= '0' && c0_ <= '9');
388 }
389 if (AsciiAlphaToLower(c0_) == 'e') {
390 Advance();
391 if (c0_ == '-' || c0_ == '+') Advance();
392 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
393 do {
394 Advance();
395 } while (c0_ >= '0' && c0_ <= '9');
396 }
ager@chromium.org04921a82011-06-27 13:21:41 +0000397 int length = position_ - beg_pos;
398 double number;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000399 if (seq_ascii) {
ager@chromium.org04921a82011-06-27 13:21:41 +0000400 Vector<const char> chars(seq_source_->GetChars() + beg_pos, length);
401 number = StringToDouble(isolate()->unicode_cache(),
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000402 chars,
403 NO_FLAGS, // Hex, octal or trailing junk.
404 OS::nan_value());
405 } else {
406 Vector<char> buffer = Vector<char>::New(length);
ager@chromium.org04921a82011-06-27 13:21:41 +0000407 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000408 Vector<const char> result =
409 Vector<const char>(reinterpret_cast<const char*>(buffer.start()),
410 length);
ager@chromium.org04921a82011-06-27 13:21:41 +0000411 number = StringToDouble(isolate()->unicode_cache(),
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000412 result,
413 NO_FLAGS, // Hex, octal or trailing junk.
414 0.0);
415 buffer.Dispose();
416 }
417 SkipWhitespace();
ager@chromium.org04921a82011-06-27 13:21:41 +0000418 return isolate()->factory()->NewNumber(number);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000419}
420
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000421
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000422template <typename StringType>
423inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
424
425template <>
426inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
427 seq_str->SeqTwoByteStringSet(i, c);
428}
429
430template <>
431inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) {
432 seq_str->SeqAsciiStringSet(i, c);
433}
434
435template <typename StringType>
436inline Handle<StringType> NewRawString(Factory* factory, int length);
437
438template <>
439inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) {
440 return factory->NewRawTwoByteString(length, NOT_TENURED);
441}
442
443template <>
444inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) {
445 return factory->NewRawAsciiString(length, NOT_TENURED);
446}
447
448
449// Scans the rest of a JSON string starting from position_ and writes
450// prefix[start..end] along with the scanned characters into a
451// sequential string of type StringType.
452template <bool seq_ascii>
453template <typename StringType, typename SinkChar>
454Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(
455 Handle<String> prefix, int start, int end) {
456 int count = end - start;
457 int max_length = count + source_length_ - position_;
458 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
459 Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(),
460 length);
461 // Copy prefix into seq_str.
462 SinkChar* dest = seq_str->GetChars();
463 String::WriteToFlat(*prefix, dest, start, end);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000464
465 while (c0_ != '"') {
kmillikin@chromium.org7c2628c2011-08-10 11:27:35 +0000466 // Check for control character (0x00-0x1f) or unterminated string (<0).
467 if (c0_ < 0x20) return Handle<String>::null();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000468 if (count >= length) {
469 // We need to create a longer sequential string for the result.
470 return SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000471 }
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000472 if (c0_ != '\\') {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000473 // If the sink can contain UC16 characters, or source_ contains only
474 // ASCII characters, there's no need to test whether we can store the
475 // character. Otherwise check whether the UC16 source character can fit
476 // in the ASCII sink.
477 if (sizeof(SinkChar) == kUC16Size ||
478 seq_ascii ||
479 c0_ <= kMaxAsciiCharCode) {
480 SeqStringSet(seq_str, count++, c0_);
481 Advance();
482 } else {
483 // StringType is SeqAsciiString and we just read a non-ASCII char.
484 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 0, count);
485 }
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000486 } else {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000487 Advance(); // Advance past the \.
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000488 switch (c0_) {
489 case '"':
490 case '\\':
491 case '/':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000492 SeqStringSet(seq_str, count++, c0_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000493 break;
494 case 'b':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000495 SeqStringSet(seq_str, count++, '\x08');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000496 break;
497 case 'f':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000498 SeqStringSet(seq_str, count++, '\x0c');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000499 break;
500 case 'n':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000501 SeqStringSet(seq_str, count++, '\x0a');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000502 break;
503 case 'r':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000504 SeqStringSet(seq_str, count++, '\x0d');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000505 break;
506 case 't':
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000507 SeqStringSet(seq_str, count++, '\x09');
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000508 break;
509 case 'u': {
510 uc32 value = 0;
511 for (int i = 0; i < 4; i++) {
512 Advance();
513 int digit = HexValue(c0_);
514 if (digit < 0) {
515 return Handle<String>::null();
516 }
517 value = value * 16 + digit;
518 }
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000519 if (sizeof(SinkChar) == kUC16Size || value <= kMaxAsciiCharCode) {
520 SeqStringSet(seq_str, count++, value);
521 break;
522 } else {
523 // StringType is SeqAsciiString and we just read a non-ASCII char.
524 position_ -= 6; // Rewind position_ to \ in \uxxxx.
525 Advance();
526 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str,
527 0,
528 count);
529 }
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000530 }
531 default:
532 return Handle<String>::null();
533 }
534 Advance();
535 }
536 }
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000537 // Shrink seq_string length to count.
538 if (isolate()->heap()->InNewSpace(*seq_str)) {
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000539 isolate()->heap()->new_space()->
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000540 template ShrinkStringAtAllocationBoundary<StringType>(
541 *seq_str, count);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000542 } else {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000543 int string_size = StringType::SizeFor(count);
544 int allocated_string_size = StringType::SizeFor(length);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000545 int delta = allocated_string_size - string_size;
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000546 Address start_filler_object = seq_str->address() + string_size;
547 seq_str->set_length(count);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000548 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
549 }
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000550 ASSERT_EQ('"', c0_);
551 // Advance past the last '"'.
552 AdvanceSkipWhitespace();
553 return seq_str;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000554}
555
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000556
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000557template <bool seq_ascii>
558template <bool is_symbol>
559Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
560 ASSERT_EQ('"', c0_);
561 Advance();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000562 if (c0_ == '"') {
563 AdvanceSkipWhitespace();
564 return Handle<String>(isolate()->heap()->empty_string());
565 }
ager@chromium.org04921a82011-06-27 13:21:41 +0000566 int beg_pos = position_;
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000567 // Fast case for ASCII only without escape characters.
568 do {
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000569 // Check for control character (0x00-0x1f) or unterminated string (<0).
570 if (c0_ < 0x20) return Handle<String>::null();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000571 if (c0_ != '\\') {
572 if (seq_ascii || c0_ <= kMaxAsciiCharCode) {
573 Advance();
574 } else {
575 return SlowScanJsonString<SeqTwoByteString, uc16>(source_,
576 beg_pos,
577 position_);
578 }
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000579 } else {
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000580 return SlowScanJsonString<SeqAsciiString, char>(source_,
581 beg_pos,
582 position_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000583 }
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000584 } while (c0_ != '"');
585 int length = position_ - beg_pos;
586 Handle<String> result;
587 if (seq_ascii && is_symbol) {
588 result = isolate()->factory()->LookupAsciiSymbol(seq_source_,
589 beg_pos,
590 length);
591 } else {
592 result = isolate()->factory()->NewRawAsciiString(length);
593 char* dest = SeqAsciiString::cast(*result)->GetChars();
594 String::WriteToFlat(*source_, dest, beg_pos, position_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000595 }
596 ASSERT_EQ('"', c0_);
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000597 // Advance past the last '"'.
598 AdvanceSkipWhitespace();
vegorov@chromium.org3cf47312011-06-29 13:20:01 +0000599 return result;
jkummerow@chromium.orge297f592011-06-08 10:05:15 +0000600}
601
danno@chromium.org40cb8782011-05-25 07:58:50 +0000602} } // namespace v8::internal
603
604#endif // V8_JSON_PARSER_H_