blob: df916751386848cb27cac043b9f97651c0a9dc85 [file] [log] [blame]
Feng Xiaoe96ff302015-06-15 18:21:48 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31#include <google/protobuf/util/internal/json_stream_parser.h>
32
33#include <algorithm>
34#include <cctype>
35#include <cerrno>
36#include <cstdlib>
37#include <cstring>
38#include <memory>
39#ifndef _SHARED_PTR_H
40#include <google/protobuf/stubs/shared_ptr.h>
41#endif
42
Feng Xiaoeee38b02015-08-22 18:25:48 -070043#include <google/protobuf/stubs/logging.h>
Feng Xiaoe96ff302015-06-15 18:21:48 -070044#include <google/protobuf/stubs/common.h>
45#include <google/protobuf/stubs/strutil.h>
46#include <google/protobuf/util/internal/object_writer.h>
47
48namespace google {
49namespace protobuf {
50namespace util {
51
52// Allow these symbols to be referenced as util::Status, util::error::* in
53// this file.
54using util::Status;
55namespace error {
56using util::error::INTERNAL;
57using util::error::INVALID_ARGUMENT;
58} // namespace error
59
60namespace converter {
61
62// Number of digits in a unicode escape sequence (/uXXXX)
63static const int kUnicodeEscapedLength = 6;
64
65// Length of the true, false, and null literals.
66static const int true_len = strlen("true");
67static const int false_len = strlen("false");
68static const int null_len = strlen("null");
69
70inline bool IsLetter(char c) {
71 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') ||
72 (c == '$');
73}
74
75inline bool IsAlphanumeric(char c) {
76 return IsLetter(c) || ('0' <= c && c <= '9');
77}
78
79static bool ConsumeKey(StringPiece* input, StringPiece* key) {
80 if (input->empty() || !IsLetter((*input)[0])) return false;
81 int len = 1;
82 for (; len < input->size(); ++len) {
83 if (!IsAlphanumeric((*input)[len])) {
84 break;
85 }
86 }
87 *key = StringPiece(input->data(), len);
88 *input = StringPiece(input->data() + len, input->size() - len);
89 return true;
90}
91
92static bool MatchKey(StringPiece input) {
93 return !input.empty() && IsLetter(input[0]);
94}
95
96JsonStreamParser::JsonStreamParser(ObjectWriter* ow)
97 : ow_(ow),
98 stack_(),
99 leftover_(),
100 json_(),
101 p_(),
102 key_(),
103 key_storage_(),
104 finishing_(false),
105 parsed_(),
106 parsed_storage_(),
107 string_open_(0),
Feng Xiaoeee38b02015-08-22 18:25:48 -0700108 chunk_storage_(),
109 coerce_to_utf8_(false) {
Feng Xiaoe96ff302015-06-15 18:21:48 -0700110 // Initialize the stack with a single value to be parsed.
111 stack_.push(VALUE);
112}
113
114JsonStreamParser::~JsonStreamParser() {}
115
Feng Xiaoeee38b02015-08-22 18:25:48 -0700116
Feng Xiaoe96ff302015-06-15 18:21:48 -0700117util::Status JsonStreamParser::Parse(StringPiece json) {
Feng Xiaoeee38b02015-08-22 18:25:48 -0700118 StringPiece chunk = json;
119 // If we have leftovers from a previous chunk, append the new chunk to it
120 // and create a new StringPiece pointing at the string's data. This could
121 // be large but we rely on the chunks to be small, assuming they are
122 // fragments of a Cord.
123 if (!leftover_.empty()) {
124 // Don't point chunk to leftover_ because leftover_ will be updated in
125 // ParseChunk(chunk).
126 chunk_storage_.swap(leftover_);
127 json.AppendToString(&chunk_storage_);
128 chunk = StringPiece(chunk_storage_);
129 }
130
131 // Find the structurally valid UTF8 prefix and parse only that.
132 int n = internal::UTF8SpnStructurallyValid(chunk);
133 if (n > 0) {
134 util::Status status = ParseChunk(chunk.substr(0, n));
135
136 // Any leftover characters are stashed in leftover_ for later parsing when
137 // there is more data available.
138 chunk.substr(n).AppendToString(&leftover_);
139 return status;
140 } else {
141 chunk.CopyToString(&leftover_);
142 return util::Status::OK;
143 }
Feng Xiaoe96ff302015-06-15 18:21:48 -0700144}
145
146util::Status JsonStreamParser::FinishParse() {
147 // If we do not expect anything and there is nothing left to parse we're all
148 // done.
149 if (stack_.empty() && leftover_.empty()) {
150 return util::Status::OK;
151 }
Feng Xiaoeee38b02015-08-22 18:25:48 -0700152
153 // Storage for UTF8-coerced string.
154 google::protobuf::scoped_array<char> utf8;
155 if (coerce_to_utf8_) {
156 utf8.reset(new char[leftover_.size()]);
157 char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' ');
158 p_ = json_ = StringPiece(coerced, leftover_.size());
159 } else {
Feng Xiaoe841bac2015-12-11 17:09:20 -0800160 p_ = json_ = leftover_;
Feng Xiaoeee38b02015-08-22 18:25:48 -0700161 if (!internal::IsStructurallyValidUTF8(leftover_)) {
162 return ReportFailure("Encountered non UTF-8 code points.");
163 }
Feng Xiaoeee38b02015-08-22 18:25:48 -0700164 }
165
Feng Xiaoe96ff302015-06-15 18:21:48 -0700166 // Parse the remainder in finishing mode, which reports errors for things like
167 // unterminated strings or unknown tokens that would normally be retried.
Feng Xiaoe96ff302015-06-15 18:21:48 -0700168 finishing_ = true;
169 util::Status result = RunParser();
170 if (result.ok()) {
171 SkipWhitespace();
172 if (!p_.empty()) {
173 result = ReportFailure("Parsing terminated before end of input.");
174 }
175 }
176 return result;
177}
178
179util::Status JsonStreamParser::ParseChunk(StringPiece chunk) {
Feng Xiaoeee38b02015-08-22 18:25:48 -0700180 // Do not do any work if the chunk is empty.
181 if (chunk.empty()) return util::Status::OK;
182
183 p_ = json_ = chunk;
Feng Xiaoe96ff302015-06-15 18:21:48 -0700184
185 finishing_ = false;
186 util::Status result = RunParser();
187 if (!result.ok()) return result;
188
189 SkipWhitespace();
190 if (p_.empty()) {
191 // If we parsed everything we had, clear the leftover.
192 leftover_.clear();
193 } else {
194 // If we do not expect anything i.e. stack is empty, and we have non-empty
195 // string left to parse, we report an error.
196 if (stack_.empty()) {
197 return ReportFailure("Parsing terminated before end of input.");
198 }
199 // If we expect future data i.e. stack is non-empty, and we have some
200 // unparsed data left, we save it for later parse.
201 leftover_ = p_.ToString();
202 }
203 return util::Status::OK;
204}
205
206util::Status JsonStreamParser::RunParser() {
207 while (!stack_.empty()) {
208 ParseType type = stack_.top();
209 TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING;
210 stack_.pop();
211 util::Status result;
212 switch (type) {
213 case VALUE:
214 result = ParseValue(t);
215 break;
216
217 case OBJ_MID:
218 result = ParseObjectMid(t);
219 break;
220
221 case ENTRY:
222 result = ParseEntry(t);
223 break;
224
225 case ENTRY_MID:
226 result = ParseEntryMid(t);
227 break;
228
229 case ARRAY_VALUE:
230 result = ParseArrayValue(t);
231 break;
232
233 case ARRAY_MID:
234 result = ParseArrayMid(t);
235 break;
236
237 default:
238 result = util::Status(util::error::INTERNAL,
239 StrCat("Unknown parse type: ", type));
240 break;
241 }
242 if (!result.ok()) {
243 // If we were cancelled, save our state and try again later.
244 if (!finishing_ && result == util::Status::CANCELLED) {
245 stack_.push(type);
246 // If we have a key we still need to render, make sure to save off the
247 // contents in our own storage.
248 if (!key_.empty() && key_storage_.empty()) {
249 key_.AppendToString(&key_storage_);
250 key_ = StringPiece(key_storage_);
251 }
252 result = util::Status::OK;
253 }
254 return result;
255 }
256 }
257 return util::Status::OK;
258}
259
260util::Status JsonStreamParser::ParseValue(TokenType type) {
261 switch (type) {
262 case BEGIN_OBJECT:
263 return HandleBeginObject();
264 case BEGIN_ARRAY:
265 return HandleBeginArray();
266 case BEGIN_STRING:
267 return ParseString();
268 case BEGIN_NUMBER:
269 return ParseNumber();
270 case BEGIN_TRUE:
271 return ParseTrue();
272 case BEGIN_FALSE:
273 return ParseFalse();
274 case BEGIN_NULL:
275 return ParseNull();
276 case UNKNOWN:
277 return ReportUnknown("Expected a value.");
278 default: {
279 // Special case for having been cut off while parsing, wait for more data.
280 // This handles things like 'fals' being at the end of the string, we
281 // don't know if the next char would be e, completing it, or something
282 // else, making it invalid.
283 if (!finishing_ && p_.length() < false_len) {
284 return util::Status::CANCELLED;
285 }
286 return ReportFailure("Unexpected token.");
287 }
288 }
289}
290
291util::Status JsonStreamParser::ParseString() {
292 util::Status result = ParseStringHelper();
293 if (result.ok()) {
294 ow_->RenderString(key_, parsed_);
295 key_.clear();
296 parsed_.clear();
297 parsed_storage_.clear();
298 }
299 return result;
300}
301
302util::Status JsonStreamParser::ParseStringHelper() {
303 // If we haven't seen the start quote, grab it and remember it for later.
304 if (string_open_ == 0) {
305 string_open_ = *p_.data();
306 GOOGLE_DCHECK(string_open_ == '\"' || string_open_ == '\'');
307 Advance();
308 }
309 // Track where we last copied data from so we can minimize copying.
310 const char* last = p_.data();
311 while (!p_.empty()) {
312 const char* data = p_.data();
313 if (*data == '\\') {
314 // We're about to handle an escape, copy all bytes from last to data.
315 if (last < data) {
316 parsed_storage_.append(last, data - last);
317 last = data;
318 }
319 // If we ran out of string after the \, cancel or report an error
320 // depending on if we expect more data later.
321 if (p_.length() == 1) {
322 if (!finishing_) {
323 return util::Status::CANCELLED;
324 }
325 return ReportFailure("Closing quote expected in string.");
326 }
327 // Parse a unicode escape if we found \u in the string.
328 if (data[1] == 'u') {
329 util::Status result = ParseUnicodeEscape();
330 if (!result.ok()) {
331 return result;
332 }
333 // Move last pointer past the unicode escape and continue.
334 last = p_.data();
335 continue;
336 }
337 // Handle the standard set of backslash-escaped characters.
338 switch (data[1]) {
339 case 'b':
340 parsed_storage_.push_back('\b');
341 break;
342 case 'f':
343 parsed_storage_.push_back('\f');
344 break;
345 case 'n':
346 parsed_storage_.push_back('\n');
347 break;
348 case 'r':
349 parsed_storage_.push_back('\r');
350 break;
351 case 't':
352 parsed_storage_.push_back('\t');
353 break;
354 case 'v':
355 parsed_storage_.push_back('\v');
356 break;
357 default:
358 parsed_storage_.push_back(data[1]);
359 }
360 // We handled two characters, so advance past them and continue.
361 p_.remove_prefix(2);
362 last = p_.data();
363 continue;
364 }
365 // If we found the closing quote note it, advance past it, and return.
366 if (*data == string_open_) {
367 // If we didn't copy anything, reuse the input buffer.
368 if (parsed_storage_.empty()) {
369 parsed_ = StringPiece(last, data - last);
370 } else {
371 if (last < data) {
372 parsed_storage_.append(last, data - last);
373 last = data;
374 }
375 parsed_ = StringPiece(parsed_storage_);
376 }
377 // Clear the quote char so next time we try to parse a string we'll
378 // start fresh.
379 string_open_ = 0;
380 Advance();
381 return util::Status::OK;
382 }
383 // Normal character, just advance past it.
384 Advance();
385 }
386 // If we ran out of characters, copy over what we have so far.
387 if (last < p_.data()) {
388 parsed_storage_.append(last, p_.data() - last);
389 }
390 // If we didn't find the closing quote but we expect more data, cancel for now
391 if (!finishing_) {
392 return util::Status::CANCELLED;
393 }
394 // End of string reached without a closing quote, report an error.
395 string_open_ = 0;
396 return ReportFailure("Closing quote expected in string.");
397}
398
399// Converts a unicode escaped character to a decimal value stored in a char32
400// for use in UTF8 encoding utility. We assume that str begins with \uhhhh and
401// convert that from the hex number to a decimal value.
402//
403// There are some security exploits with UTF-8 that we should be careful of:
404// - http://www.unicode.org/reports/tr36/#UTF-8_Exploit
405// - http://sites/intl-eng/design-guide/core-application
406util::Status JsonStreamParser::ParseUnicodeEscape() {
407 if (p_.length() < kUnicodeEscapedLength) {
408 if (!finishing_) {
409 return util::Status::CANCELLED;
410 }
411 return ReportFailure("Illegal hex string.");
412 }
413 GOOGLE_DCHECK_EQ('\\', p_.data()[0]);
414 GOOGLE_DCHECK_EQ('u', p_.data()[1]);
415 uint32 code = 0;
416 for (int i = 2; i < kUnicodeEscapedLength; ++i) {
417 if (!isxdigit(p_.data()[i])) {
418 return ReportFailure("Invalid escape sequence.");
419 }
420 code = (code << 4) + hex_digit_to_int(p_.data()[i]);
421 }
422 char buf[UTFmax];
423 int len = EncodeAsUTF8Char(code, buf);
424 // Advance past the unicode escape.
425 p_.remove_prefix(kUnicodeEscapedLength);
426 parsed_storage_.append(buf, len);
427 return util::Status::OK;
428}
429
430util::Status JsonStreamParser::ParseNumber() {
431 NumberResult number;
432 util::Status result = ParseNumberHelper(&number);
433 if (result.ok()) {
434 switch (number.type) {
435 case NumberResult::DOUBLE:
436 ow_->RenderDouble(key_, number.double_val);
437 key_.clear();
438 break;
439
440 case NumberResult::INT:
441 ow_->RenderInt64(key_, number.int_val);
442 key_.clear();
443 break;
444
445 case NumberResult::UINT:
446 ow_->RenderUint64(key_, number.uint_val);
447 key_.clear();
448 break;
449
450 default:
451 return ReportFailure("Unable to parse number.");
452 }
453 }
454 return result;
455}
456
457util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
458 const char* data = p_.data();
459 int length = p_.length();
460
461 // Look for the first non-numeric character, or the end of the string.
462 int index = 0;
463 bool floating = false;
464 bool negative = data[index] == '-';
465 // Find the first character that cannot be part of the number. Along the way
466 // detect if the number needs to be parsed as a double.
467 // Note that this restricts numbers to the JSON specification, so for example
468 // we do not support hex or octal notations.
469 for (; index < length; ++index) {
470 char c = data[index];
471 if (isdigit(c)) continue;
472 if (c == '.' || c == 'e' || c == 'E') {
473 floating = true;
474 continue;
475 }
476 if (c == '+' || c == '-') continue;
477 // Not a valid number character, break out.
478 break;
479 }
480
481 // If the entire input is a valid number, and we may have more content in the
482 // future, we abort for now and resume when we know more.
483 if (index == length && !finishing_) {
484 return util::Status::CANCELLED;
485 }
486
487 // Create a string containing just the number, so we can use safe_strtoX
488 string number = p_.substr(0, index).ToString();
489
490 // Floating point number, parse as a double.
491 if (floating) {
492 if (!safe_strtod(number, &result->double_val)) {
493 return ReportFailure("Unable to parse number.");
494 }
495 result->type = NumberResult::DOUBLE;
496 p_.remove_prefix(index);
497 return util::Status::OK;
498 }
499
500 // Positive non-floating point number, parse as a uint64.
501 if (!negative) {
502 if (!safe_strtou64(number, &result->uint_val)) {
503 return ReportFailure("Unable to parse number.");
504 }
505 result->type = NumberResult::UINT;
506 p_.remove_prefix(index);
507 return util::Status::OK;
508 }
509
510 // Negative non-floating point number, parse as an int64.
511 if (!safe_strto64(number, &result->int_val)) {
512 return ReportFailure("Unable to parse number.");
513 }
514 result->type = NumberResult::INT;
515 p_.remove_prefix(index);
516 return util::Status::OK;
517}
518
519util::Status JsonStreamParser::HandleBeginObject() {
520 GOOGLE_DCHECK_EQ('{', *p_.data());
521 Advance();
522 ow_->StartObject(key_);
523 key_.clear();
524 stack_.push(ENTRY);
525 return util::Status::OK;
526}
527
528util::Status JsonStreamParser::ParseObjectMid(TokenType type) {
529 if (type == UNKNOWN) {
530 return ReportUnknown("Expected , or } after key:value pair.");
531 }
532
533 // Object is complete, advance past the comma and render the EndObject.
534 if (type == END_OBJECT) {
535 Advance();
536 ow_->EndObject();
537 return util::Status::OK;
538 }
539 // Found a comma, advance past it and get ready for an entry.
540 if (type == VALUE_SEPARATOR) {
541 Advance();
542 stack_.push(ENTRY);
543 return util::Status::OK;
544 }
545 // Illegal token after key:value pair.
546 return ReportFailure("Expected , or } after key:value pair.");
547}
548
549util::Status JsonStreamParser::ParseEntry(TokenType type) {
550 if (type == UNKNOWN) {
551 return ReportUnknown("Expected an object key or }.");
552 }
553
554 // Close the object and return. This allows for trailing commas.
555 if (type == END_OBJECT) {
556 ow_->EndObject();
557 Advance();
558 return util::Status::OK;
559 }
560
561 util::Status result;
562 if (type == BEGIN_STRING) {
563 // Key is a string (standard JSON), parse it and store the string.
564 result = ParseStringHelper();
565 if (result.ok()) {
566 key_storage_.clear();
567 if (!parsed_storage_.empty()) {
568 parsed_storage_.swap(key_storage_);
569 key_ = StringPiece(key_storage_);
570 } else {
571 key_ = parsed_;
572 }
573 parsed_.clear();
574 }
575 } else if (type == BEGIN_KEY) {
576 // Key is a bare key (back compat), create a StringPiece pointing to it.
577 result = ParseKey();
578 } else {
579 // Unknown key type, report an error.
580 result = ReportFailure("Expected an object key or }.");
581 }
582 // On success we next expect an entry mid ':' then an object mid ',' or '}'
583 if (result.ok()) {
584 stack_.push(OBJ_MID);
585 stack_.push(ENTRY_MID);
586 }
587 return result;
588}
589
590util::Status JsonStreamParser::ParseEntryMid(TokenType type) {
591 if (type == UNKNOWN) {
592 return ReportUnknown("Expected : between key:value pair.");
593 }
594 if (type == ENTRY_SEPARATOR) {
595 Advance();
596 stack_.push(VALUE);
597 return util::Status::OK;
598 }
599 return ReportFailure("Expected : between key:value pair.");
600}
601
602util::Status JsonStreamParser::HandleBeginArray() {
603 GOOGLE_DCHECK_EQ('[', *p_.data());
604 Advance();
605 ow_->StartList(key_);
606 key_.clear();
607 stack_.push(ARRAY_VALUE);
608 return util::Status::OK;
609}
610
611util::Status JsonStreamParser::ParseArrayValue(TokenType type) {
612 if (type == UNKNOWN) {
613 return ReportUnknown("Expected a value or ] within an array.");
614 }
615
616 if (type == END_ARRAY) {
617 ow_->EndList();
618 Advance();
619 return util::Status::OK;
620 }
621
622 // The ParseValue call may push something onto the stack so we need to make
623 // sure an ARRAY_MID is after it, so we push it on now.
624 stack_.push(ARRAY_MID);
625 util::Status result = ParseValue(type);
626 if (result == util::Status::CANCELLED) {
627 // If we were cancelled, pop back off the ARRAY_MID so we don't try to
628 // push it on again when we try over.
629 stack_.pop();
630 }
631 return result;
632}
633
634util::Status JsonStreamParser::ParseArrayMid(TokenType type) {
635 if (type == UNKNOWN) {
636 return ReportUnknown("Expected , or ] after array value.");
637 }
638
639 if (type == END_ARRAY) {
640 ow_->EndList();
641 Advance();
642 return util::Status::OK;
643 }
644
645 // Found a comma, advance past it and expect an array value next.
646 if (type == VALUE_SEPARATOR) {
647 Advance();
648 stack_.push(ARRAY_VALUE);
649 return util::Status::OK;
650 }
651 // Illegal token after array value.
652 return ReportFailure("Expected , or ] after array value.");
653}
654
655util::Status JsonStreamParser::ParseTrue() {
656 ow_->RenderBool(key_, true);
657 key_.clear();
658 p_.remove_prefix(true_len);
659 return util::Status::OK;
660}
661
662util::Status JsonStreamParser::ParseFalse() {
663 ow_->RenderBool(key_, false);
664 key_.clear();
665 p_.remove_prefix(false_len);
666 return util::Status::OK;
667}
668
669util::Status JsonStreamParser::ParseNull() {
670 ow_->RenderNull(key_);
671 key_.clear();
672 p_.remove_prefix(null_len);
673 return util::Status::OK;
674}
675
676util::Status JsonStreamParser::ReportFailure(StringPiece message) {
677 static const int kContextLength = 20;
678 const char* p_start = p_.data();
679 const char* json_start = json_.data();
680 const char* begin = max(p_start - kContextLength, json_start);
681 const char* end = min(p_start + kContextLength, json_start + json_.size());
682 StringPiece segment(begin, end - begin);
683 string location(p_start - begin, ' ');
684 location.push_back('^');
685 return util::Status(util::error::INVALID_ARGUMENT,
686 StrCat(message, "\n", segment, "\n", location));
687}
688
689util::Status JsonStreamParser::ReportUnknown(StringPiece message) {
690 // If we aren't finishing the parse, cancel parsing and try later.
691 if (!finishing_) {
692 return util::Status::CANCELLED;
693 }
694 if (p_.empty()) {
695 return ReportFailure(StrCat("Unexpected end of string. ", message));
696 }
697 return ReportFailure(message);
698}
699
700void JsonStreamParser::SkipWhitespace() {
701 while (!p_.empty() && ascii_isspace(*p_.data())) {
702 Advance();
703 }
704}
705
706void JsonStreamParser::Advance() {
707 // Advance by moving one UTF8 character while making sure we don't go beyond
708 // the length of StringPiece.
709 p_.remove_prefix(
710 min<int>(p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
711}
712
713util::Status JsonStreamParser::ParseKey() {
714 StringPiece original = p_;
715 if (!ConsumeKey(&p_, &key_)) {
716 return ReportFailure("Invalid key or variable name.");
717 }
718 // If we consumed everything but expect more data, reset p_ and cancel since
719 // we can't know if the key was complete or not.
720 if (!finishing_ && p_.empty()) {
721 p_ = original;
722 return util::Status::CANCELLED;
723 }
724 // Since we aren't using the key storage, clear it out.
725 key_storage_.clear();
726 return util::Status::OK;
727}
728
729JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() {
730 SkipWhitespace();
731
732 int size = p_.size();
733 if (size == 0) {
734 // If we ran out of data, report unknown and we'll place the previous parse
735 // type onto the stack and try again when we have more data.
736 return UNKNOWN;
737 }
738 // TODO(sven): Split this method based on context since different contexts
739 // support different tokens. Would slightly speed up processing?
740 const char* data = p_.data();
741 if (*data == '\"' || *data == '\'') return BEGIN_STRING;
742 if (*data == '-' || ('0' <= *data && *data <= '9')) {
743 return BEGIN_NUMBER;
744 }
745 if (size >= true_len && !strncmp(data, "true", true_len)) {
746 return BEGIN_TRUE;
747 }
748 if (size >= false_len && !strncmp(data, "false", false_len)) {
749 return BEGIN_FALSE;
750 }
751 if (size >= null_len && !strncmp(data, "null", null_len)) {
752 return BEGIN_NULL;
753 }
754 if (*data == '{') return BEGIN_OBJECT;
755 if (*data == '}') return END_OBJECT;
756 if (*data == '[') return BEGIN_ARRAY;
757 if (*data == ']') return END_ARRAY;
758 if (*data == ':') return ENTRY_SEPARATOR;
759 if (*data == ',') return VALUE_SEPARATOR;
760 if (MatchKey(p_)) {
761 return BEGIN_KEY;
762 }
763
764 // We don't know that we necessarily have an invalid token here, just that we
765 // can't parse what we have so far. So we don't report an error and just
766 // return UNKNOWN so we can try again later when we have more data, or if we
767 // finish and we have leftovers.
768 return UNKNOWN;
769}
770
771} // namespace converter
772} // namespace util
773} // namespace protobuf
774} // namespace google