Blame - src/google/protobuf/util/internal/json_stream_parser.cc - platform/external/protobuf-javalite

blob: df916751386848cb27cac043b9f97651c0a9dc85 [file] [log] [blame]

Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	1	// Protocol Buffers - Google's data interchange format
				2	// Copyright 2008 Google Inc. All rights reserved.
				3	// https://developers.google.com/protocol-buffers/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are
				7	// met:
				8	//
				9	// * Redistributions of source code must retain the above copyright
				10	// notice, this list of conditions and the following disclaimer.
				11	// * Redistributions in binary form must reproduce the above
				12	// copyright notice, this list of conditions and the following disclaimer
				13	// in the documentation and/or other materials provided with the
				14	// distribution.
				15	// * Neither the name of Google Inc. nor the names of its
				16	// contributors may be used to endorse or promote products derived from
				17	// this software without specific prior written permission.
				18	//
				19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				30
				31	#include <google/protobuf/util/internal/json_stream_parser.h>
				32
				33	#include <algorithm>
				34	#include <cctype>
				35	#include <cerrno>
				36	#include <cstdlib>
				37	#include <cstring>
				38	#include <memory>
				39	#ifndef _SHARED_PTR_H
				40	#include <google/protobuf/stubs/shared_ptr.h>
				41	#endif
				42
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	43	#include <google/protobuf/stubs/logging.h>
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	44	#include <google/protobuf/stubs/common.h>
				45	#include <google/protobuf/stubs/strutil.h>
				46	#include <google/protobuf/util/internal/object_writer.h>
				47
				48	namespace google {
				49	namespace protobuf {
				50	namespace util {
				51
				52	// Allow these symbols to be referenced as util::Status, util::error::* in
				53	// this file.
				54	using util::Status;
				55	namespace error {
				56	using util::error::INTERNAL;
				57	using util::error::INVALID_ARGUMENT;
				58	} // namespace error
				59
				60	namespace converter {
				61
				62	// Number of digits in a unicode escape sequence (/uXXXX)
				63	static const int kUnicodeEscapedLength = 6;
				64
				65	// Length of the true, false, and null literals.
				66	static const int true_len = strlen("true");
				67	static const int false_len = strlen("false");
				68	static const int null_len = strlen("null");
				69
				70	inline bool IsLetter(char c) {
				71	return ('a' <= c && c <= 'z') \|\| ('A' <= c && c <= 'Z') \|\| (c == '_') \|\|
				72	(c == '$');
				73	}
				74
				75	inline bool IsAlphanumeric(char c) {
				76	return IsLetter(c) \|\| ('0' <= c && c <= '9');
				77	}
				78
				79	static bool ConsumeKey(StringPiece* input, StringPiece* key) {
				80	if (input->empty() \|\| !IsLetter((*input)[0])) return false;
				81	int len = 1;
				82	for (; len < input->size(); ++len) {
				83	if (!IsAlphanumeric((*input)[len])) {
				84	break;
				85	}
				86	}
				87	*key = StringPiece(input->data(), len);
				88	*input = StringPiece(input->data() + len, input->size() - len);
				89	return true;
				90	}
				91
				92	static bool MatchKey(StringPiece input) {
				93	return !input.empty() && IsLetter(input[0]);
				94	}
				95
				96	JsonStreamParser::JsonStreamParser(ObjectWriter* ow)
				97	: ow_(ow),
				98	stack_(),
				99	leftover_(),
				100	json_(),
				101	p_(),
				102	key_(),
				103	key_storage_(),
				104	finishing_(false),
				105	parsed_(),
				106	parsed_storage_(),
				107	string_open_(0),
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	108	chunk_storage_(),
				109	coerce_to_utf8_(false) {
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	110	// Initialize the stack with a single value to be parsed.
				111	stack_.push(VALUE);
				112	}
				113
				114	JsonStreamParser::~JsonStreamParser() {}
				115
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	116
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	117	util::Status JsonStreamParser::Parse(StringPiece json) {
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	118	StringPiece chunk = json;
				119	// If we have leftovers from a previous chunk, append the new chunk to it
				120	// and create a new StringPiece pointing at the string's data. This could
				121	// be large but we rely on the chunks to be small, assuming they are
				122	// fragments of a Cord.
				123	if (!leftover_.empty()) {
				124	// Don't point chunk to leftover_ because leftover_ will be updated in
				125	// ParseChunk(chunk).
				126	chunk_storage_.swap(leftover_);
				127	json.AppendToString(&chunk_storage_);
				128	chunk = StringPiece(chunk_storage_);
				129	}
				130
				131	// Find the structurally valid UTF8 prefix and parse only that.
				132	int n = internal::UTF8SpnStructurallyValid(chunk);
				133	if (n > 0) {
				134	util::Status status = ParseChunk(chunk.substr(0, n));
				135
				136	// Any leftover characters are stashed in leftover_ for later parsing when
				137	// there is more data available.
				138	chunk.substr(n).AppendToString(&leftover_);
				139	return status;
				140	} else {
				141	chunk.CopyToString(&leftover_);
				142	return util::Status::OK;
				143	}
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	144	}
				145
				146	util::Status JsonStreamParser::FinishParse() {
				147	// If we do not expect anything and there is nothing left to parse we're all
				148	// done.
				149	if (stack_.empty() && leftover_.empty()) {
				150	return util::Status::OK;
				151	}
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	152
				153	// Storage for UTF8-coerced string.
				154	google::protobuf::scoped_array<char> utf8;
				155	if (coerce_to_utf8_) {
				156	utf8.reset(new char[leftover_.size()]);
				157	char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' ');
				158	p_ = json_ = StringPiece(coerced, leftover_.size());
				159	} else {
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	160	p_ = json_ = leftover_;
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	161	if (!internal::IsStructurallyValidUTF8(leftover_)) {
				162	return ReportFailure("Encountered non UTF-8 code points.");
				163	}
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	164	}
				165
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	166	// Parse the remainder in finishing mode, which reports errors for things like
				167	// unterminated strings or unknown tokens that would normally be retried.
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	168	finishing_ = true;
				169	util::Status result = RunParser();
				170	if (result.ok()) {
				171	SkipWhitespace();
				172	if (!p_.empty()) {
				173	result = ReportFailure("Parsing terminated before end of input.");
				174	}
				175	}
				176	return result;
				177	}
				178
				179	util::Status JsonStreamParser::ParseChunk(StringPiece chunk) {
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	180	// Do not do any work if the chunk is empty.
				181	if (chunk.empty()) return util::Status::OK;
				182
				183	p_ = json_ = chunk;
Feng Xiao	e96ff30	2015-06-15 18:21:48 -0700	[diff] [blame]	184
				185	finishing_ = false;
				186	util::Status result = RunParser();
				187	if (!result.ok()) return result;
				188
				189	SkipWhitespace();
				190	if (p_.empty()) {
				191	// If we parsed everything we had, clear the leftover.
				192	leftover_.clear();
				193	} else {
				194	// If we do not expect anything i.e. stack is empty, and we have non-empty
				195	// string left to parse, we report an error.
				196	if (stack_.empty()) {
				197	return ReportFailure("Parsing terminated before end of input.");
				198	}
				199	// If we expect future data i.e. stack is non-empty, and we have some
				200	// unparsed data left, we save it for later parse.
				201	leftover_ = p_.ToString();
				202	}
				203	return util::Status::OK;
				204	}
				205
				206	util::Status JsonStreamParser::RunParser() {
				207	while (!stack_.empty()) {
				208	ParseType type = stack_.top();
				209	TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING;
				210	stack_.pop();
				211	util::Status result;
				212	switch (type) {
				213	case VALUE:
				214	result = ParseValue(t);
				215	break;
				216
				217	case OBJ_MID:
				218	result = ParseObjectMid(t);
				219	break;
				220
				221	case ENTRY:
				222	result = ParseEntry(t);
				223	break;
				224
				225	case ENTRY_MID:
				226	result = ParseEntryMid(t);
				227	break;
				228
				229	case ARRAY_VALUE:
				230	result = ParseArrayValue(t);
				231	break;
				232
				233	case ARRAY_MID:
				234	result = ParseArrayMid(t);
				235	break;
				236
				237	default:
				238	result = util::Status(util::error::INTERNAL,
				239	StrCat("Unknown parse type: ", type));
				240	break;
				241	}
				242	if (!result.ok()) {
				243	// If we were cancelled, save our state and try again later.
				244	if (!finishing_ && result == util::Status::CANCELLED) {
				245	stack_.push(type);
				246	// If we have a key we still need to render, make sure to save off the
				247	// contents in our own storage.
				248	if (!key_.empty() && key_storage_.empty()) {
				249	key_.AppendToString(&key_storage_);
				250	key_ = StringPiece(key_storage_);
				251	}
				252	result = util::Status::OK;
				253	}
				254	return result;
				255	}
				256	}
				257	return util::Status::OK;
				258	}
				259
				260	util::Status JsonStreamParser::ParseValue(TokenType type) {
				261	switch (type) {
				262	case BEGIN_OBJECT:
				263	return HandleBeginObject();
				264	case BEGIN_ARRAY:
				265	return HandleBeginArray();
				266	case BEGIN_STRING:
				267	return ParseString();
				268	case BEGIN_NUMBER:
				269	return ParseNumber();
				270	case BEGIN_TRUE:
				271	return ParseTrue();
				272	case BEGIN_FALSE:
				273	return ParseFalse();
				274	case BEGIN_NULL:
				275	return ParseNull();
				276	case UNKNOWN:
				277	return ReportUnknown("Expected a value.");
				278	default: {
				279	// Special case for having been cut off while parsing, wait for more data.
				280	// This handles things like 'fals' being at the end of the string, we
				281	// don't know if the next char would be e, completing it, or something
				282	// else, making it invalid.
				283	if (!finishing_ && p_.length() < false_len) {
				284	return util::Status::CANCELLED;
				285	}
				286	return ReportFailure("Unexpected token.");
				287	}
				288	}
				289	}
				290
				291	util::Status JsonStreamParser::ParseString() {
				292	util::Status result = ParseStringHelper();
				293	if (result.ok()) {
				294	ow_->RenderString(key_, parsed_);
				295	key_.clear();
				296	parsed_.clear();
				297	parsed_storage_.clear();
				298	}
				299	return result;
				300	}
				301
				302	util::Status JsonStreamParser::ParseStringHelper() {
				303	// If we haven't seen the start quote, grab it and remember it for later.
				304	if (string_open_ == 0) {
				305	string_open_ = *p_.data();
				306	GOOGLE_DCHECK(string_open_ == '\"' \|\| string_open_ == '\'');
				307	Advance();
				308	}
				309	// Track where we last copied data from so we can minimize copying.
				310	const char* last = p_.data();
				311	while (!p_.empty()) {
				312	const char* data = p_.data();
				313	if (*data == '\\') {
				314	// We're about to handle an escape, copy all bytes from last to data.
				315	if (last < data) {
				316	parsed_storage_.append(last, data - last);
				317	last = data;
				318	}
				319	// If we ran out of string after the \, cancel or report an error
				320	// depending on if we expect more data later.
				321	if (p_.length() == 1) {
				322	if (!finishing_) {
				323	return util::Status::CANCELLED;
				324	}
				325	return ReportFailure("Closing quote expected in string.");
				326	}
				327	// Parse a unicode escape if we found \u in the string.
				328	if (data[1] == 'u') {
				329	util::Status result = ParseUnicodeEscape();
				330	if (!result.ok()) {
				331	return result;
				332	}
				333	// Move last pointer past the unicode escape and continue.
				334	last = p_.data();
				335	continue;
				336	}
				337	// Handle the standard set of backslash-escaped characters.
				338	switch (data[1]) {
				339	case 'b':
				340	parsed_storage_.push_back('\b');
				341	break;
				342	case 'f':
				343	parsed_storage_.push_back('\f');
				344	break;
				345	case 'n':
				346	parsed_storage_.push_back('\n');
				347	break;
				348	case 'r':
				349	parsed_storage_.push_back('\r');
				350	break;
				351	case 't':
				352	parsed_storage_.push_back('\t');
				353	break;
				354	case 'v':
				355	parsed_storage_.push_back('\v');
				356	break;
				357	default:
				358	parsed_storage_.push_back(data[1]);
				359	}
				360	// We handled two characters, so advance past them and continue.
				361	p_.remove_prefix(2);
				362	last = p_.data();
				363	continue;
				364	}
				365	// If we found the closing quote note it, advance past it, and return.
				366	if (*data == string_open_) {
				367	// If we didn't copy anything, reuse the input buffer.
				368	if (parsed_storage_.empty()) {
				369	parsed_ = StringPiece(last, data - last);
				370	} else {
				371	if (last < data) {
				372	parsed_storage_.append(last, data - last);
				373	last = data;
				374	}
				375	parsed_ = StringPiece(parsed_storage_);
				376	}
				377	// Clear the quote char so next time we try to parse a string we'll
				378	// start fresh.
				379	string_open_ = 0;
				380	Advance();
				381	return util::Status::OK;
				382	}
				383	// Normal character, just advance past it.
				384	Advance();
				385	}
				386	// If we ran out of characters, copy over what we have so far.
				387	if (last < p_.data()) {
				388	parsed_storage_.append(last, p_.data() - last);
				389	}
				390	// If we didn't find the closing quote but we expect more data, cancel for now
				391	if (!finishing_) {
				392	return util::Status::CANCELLED;
				393	}
				394	// End of string reached without a closing quote, report an error.
				395	string_open_ = 0;
				396	return ReportFailure("Closing quote expected in string.");
				397	}
				398
				399	// Converts a unicode escaped character to a decimal value stored in a char32
				400	// for use in UTF8 encoding utility. We assume that str begins with \uhhhh and
				401	// convert that from the hex number to a decimal value.
				402	//
				403	// There are some security exploits with UTF-8 that we should be careful of:
				404	// - http://www.unicode.org/reports/tr36/#UTF-8_Exploit
				405	// - http://sites/intl-eng/design-guide/core-application
				406	util::Status JsonStreamParser::ParseUnicodeEscape() {
				407	if (p_.length() < kUnicodeEscapedLength) {
				408	if (!finishing_) {
				409	return util::Status::CANCELLED;
				410	}
				411	return ReportFailure("Illegal hex string.");
				412	}
				413	GOOGLE_DCHECK_EQ('\\', p_.data()[0]);
				414	GOOGLE_DCHECK_EQ('u', p_.data()[1]);
				415	uint32 code = 0;
				416	for (int i = 2; i < kUnicodeEscapedLength; ++i) {
				417	if (!isxdigit(p_.data()[i])) {
				418	return ReportFailure("Invalid escape sequence.");
				419	}
				420	code = (code << 4) + hex_digit_to_int(p_.data()[i]);
				421	}
				422	char buf[UTFmax];
				423	int len = EncodeAsUTF8Char(code, buf);
				424	// Advance past the unicode escape.
				425	p_.remove_prefix(kUnicodeEscapedLength);
				426	parsed_storage_.append(buf, len);
				427	return util::Status::OK;
				428	}
				429
				430	util::Status JsonStreamParser::ParseNumber() {
				431	NumberResult number;
				432	util::Status result = ParseNumberHelper(&number);
				433	if (result.ok()) {
				434	switch (number.type) {
				435	case NumberResult::DOUBLE:
				436	ow_->RenderDouble(key_, number.double_val);
				437	key_.clear();
				438	break;
				439
				440	case NumberResult::INT:
				441	ow_->RenderInt64(key_, number.int_val);
				442	key_.clear();
				443	break;
				444
				445	case NumberResult::UINT:
				446	ow_->RenderUint64(key_, number.uint_val);
				447	key_.clear();
				448	break;
				449
				450	default:
				451	return ReportFailure("Unable to parse number.");
				452	}
				453	}
				454	return result;
				455	}
				456
				457	util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
				458	const char* data = p_.data();
				459	int length = p_.length();
				460
				461	// Look for the first non-numeric character, or the end of the string.
				462	int index = 0;
				463	bool floating = false;
				464	bool negative = data[index] == '-';
				465	// Find the first character that cannot be part of the number. Along the way
				466	// detect if the number needs to be parsed as a double.
				467	// Note that this restricts numbers to the JSON specification, so for example
				468	// we do not support hex or octal notations.
				469	for (; index < length; ++index) {
				470	char c = data[index];
				471	if (isdigit(c)) continue;
				472	if (c == '.' \|\| c == 'e' \|\| c == 'E') {
				473	floating = true;
				474	continue;
				475	}
				476	if (c == '+' \|\| c == '-') continue;
				477	// Not a valid number character, break out.
				478	break;
				479	}
				480
				481	// If the entire input is a valid number, and we may have more content in the
				482	// future, we abort for now and resume when we know more.
				483	if (index == length && !finishing_) {
				484	return util::Status::CANCELLED;
				485	}
				486
				487	// Create a string containing just the number, so we can use safe_strtoX
				488	string number = p_.substr(0, index).ToString();
				489
				490	// Floating point number, parse as a double.
				491	if (floating) {
				492	if (!safe_strtod(number, &result->double_val)) {
				493	return ReportFailure("Unable to parse number.");
				494	}
				495	result->type = NumberResult::DOUBLE;
				496	p_.remove_prefix(index);
				497	return util::Status::OK;
				498	}
				499
				500	// Positive non-floating point number, parse as a uint64.
				501	if (!negative) {
				502	if (!safe_strtou64(number, &result->uint_val)) {
				503	return ReportFailure("Unable to parse number.");
				504	}
				505	result->type = NumberResult::UINT;
				506	p_.remove_prefix(index);
				507	return util::Status::OK;
				508	}
				509
				510	// Negative non-floating point number, parse as an int64.
				511	if (!safe_strto64(number, &result->int_val)) {
				512	return ReportFailure("Unable to parse number.");
				513	}
				514	result->type = NumberResult::INT;
				515	p_.remove_prefix(index);
				516	return util::Status::OK;
				517	}
				518
				519	util::Status JsonStreamParser::HandleBeginObject() {
				520	GOOGLE_DCHECK_EQ('{', *p_.data());
				521	Advance();
				522	ow_->StartObject(key_);
				523	key_.clear();
				524	stack_.push(ENTRY);
				525	return util::Status::OK;
				526	}
				527
				528	util::Status JsonStreamParser::ParseObjectMid(TokenType type) {
				529	if (type == UNKNOWN) {
				530	return ReportUnknown("Expected , or } after key:value pair.");
				531	}
				532
				533	// Object is complete, advance past the comma and render the EndObject.
				534	if (type == END_OBJECT) {
				535	Advance();
				536	ow_->EndObject();
				537	return util::Status::OK;
				538	}
				539	// Found a comma, advance past it and get ready for an entry.
				540	if (type == VALUE_SEPARATOR) {
				541	Advance();
				542	stack_.push(ENTRY);
				543	return util::Status::OK;
				544	}
				545	// Illegal token after key:value pair.
				546	return ReportFailure("Expected , or } after key:value pair.");
				547	}
				548
				549	util::Status JsonStreamParser::ParseEntry(TokenType type) {
				550	if (type == UNKNOWN) {
				551	return ReportUnknown("Expected an object key or }.");
				552	}
				553
				554	// Close the object and return. This allows for trailing commas.
				555	if (type == END_OBJECT) {
				556	ow_->EndObject();
				557	Advance();
				558	return util::Status::OK;
				559	}
				560
				561	util::Status result;
				562	if (type == BEGIN_STRING) {
				563	// Key is a string (standard JSON), parse it and store the string.
				564	result = ParseStringHelper();
				565	if (result.ok()) {
				566	key_storage_.clear();
				567	if (!parsed_storage_.empty()) {
				568	parsed_storage_.swap(key_storage_);
				569	key_ = StringPiece(key_storage_);
				570	} else {
				571	key_ = parsed_;
				572	}
				573	parsed_.clear();
				574	}
				575	} else if (type == BEGIN_KEY) {
				576	// Key is a bare key (back compat), create a StringPiece pointing to it.
				577	result = ParseKey();
				578	} else {
				579	// Unknown key type, report an error.
				580	result = ReportFailure("Expected an object key or }.");
				581	}
				582	// On success we next expect an entry mid ':' then an object mid ',' or '}'
				583	if (result.ok()) {
				584	stack_.push(OBJ_MID);
				585	stack_.push(ENTRY_MID);
				586	}
				587	return result;
				588	}
				589
				590	util::Status JsonStreamParser::ParseEntryMid(TokenType type) {
				591	if (type == UNKNOWN) {
				592	return ReportUnknown("Expected : between key:value pair.");
				593	}
				594	if (type == ENTRY_SEPARATOR) {
				595	Advance();
				596	stack_.push(VALUE);
				597	return util::Status::OK;
				598	}
				599	return ReportFailure("Expected : between key:value pair.");
				600	}
				601
				602	util::Status JsonStreamParser::HandleBeginArray() {
				603	GOOGLE_DCHECK_EQ('[', *p_.data());
				604	Advance();
				605	ow_->StartList(key_);
				606	key_.clear();
				607	stack_.push(ARRAY_VALUE);
				608	return util::Status::OK;
				609	}
				610
				611	util::Status JsonStreamParser::ParseArrayValue(TokenType type) {
				612	if (type == UNKNOWN) {
				613	return ReportUnknown("Expected a value or ] within an array.");
				614	}
				615
				616	if (type == END_ARRAY) {
				617	ow_->EndList();
				618	Advance();
				619	return util::Status::OK;
				620	}
				621
				622	// The ParseValue call may push something onto the stack so we need to make
				623	// sure an ARRAY_MID is after it, so we push it on now.
				624	stack_.push(ARRAY_MID);
				625	util::Status result = ParseValue(type);
				626	if (result == util::Status::CANCELLED) {
				627	// If we were cancelled, pop back off the ARRAY_MID so we don't try to
				628	// push it on again when we try over.
				629	stack_.pop();
				630	}
				631	return result;
				632	}
				633
				634	util::Status JsonStreamParser::ParseArrayMid(TokenType type) {
				635	if (type == UNKNOWN) {
				636	return ReportUnknown("Expected , or ] after array value.");
				637	}
				638
				639	if (type == END_ARRAY) {
				640	ow_->EndList();
				641	Advance();
				642	return util::Status::OK;
				643	}
				644
				645	// Found a comma, advance past it and expect an array value next.
				646	if (type == VALUE_SEPARATOR) {
				647	Advance();
				648	stack_.push(ARRAY_VALUE);
				649	return util::Status::OK;
				650	}
				651	// Illegal token after array value.
				652	return ReportFailure("Expected , or ] after array value.");
				653	}
				654
				655	util::Status JsonStreamParser::ParseTrue() {
				656	ow_->RenderBool(key_, true);
				657	key_.clear();
				658	p_.remove_prefix(true_len);
				659	return util::Status::OK;
				660	}
				661
				662	util::Status JsonStreamParser::ParseFalse() {
				663	ow_->RenderBool(key_, false);
				664	key_.clear();
				665	p_.remove_prefix(false_len);
				666	return util::Status::OK;
				667	}
				668
				669	util::Status JsonStreamParser::ParseNull() {
				670	ow_->RenderNull(key_);
				671	key_.clear();
				672	p_.remove_prefix(null_len);
				673	return util::Status::OK;
				674	}
				675
				676	util::Status JsonStreamParser::ReportFailure(StringPiece message) {
				677	static const int kContextLength = 20;
				678	const char* p_start = p_.data();
				679	const char* json_start = json_.data();
				680	const char* begin = max(p_start - kContextLength, json_start);
				681	const char* end = min(p_start + kContextLength, json_start + json_.size());
				682	StringPiece segment(begin, end - begin);
				683	string location(p_start - begin, ' ');
				684	location.push_back('^');
				685	return util::Status(util::error::INVALID_ARGUMENT,
				686	StrCat(message, "\n", segment, "\n", location));
				687	}
				688
				689	util::Status JsonStreamParser::ReportUnknown(StringPiece message) {
				690	// If we aren't finishing the parse, cancel parsing and try later.
				691	if (!finishing_) {
				692	return util::Status::CANCELLED;
				693	}
				694	if (p_.empty()) {
				695	return ReportFailure(StrCat("Unexpected end of string. ", message));
				696	}
				697	return ReportFailure(message);
				698	}
				699
				700	void JsonStreamParser::SkipWhitespace() {
				701	while (!p_.empty() && ascii_isspace(*p_.data())) {
				702	Advance();
				703	}
				704	}
				705
				706	void JsonStreamParser::Advance() {
				707	// Advance by moving one UTF8 character while making sure we don't go beyond
				708	// the length of StringPiece.
				709	p_.remove_prefix(
				710	min<int>(p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
				711	}
				712
				713	util::Status JsonStreamParser::ParseKey() {
				714	StringPiece original = p_;
				715	if (!ConsumeKey(&p_, &key_)) {
				716	return ReportFailure("Invalid key or variable name.");
				717	}
				718	// If we consumed everything but expect more data, reset p_ and cancel since
				719	// we can't know if the key was complete or not.
				720	if (!finishing_ && p_.empty()) {
				721	p_ = original;
				722	return util::Status::CANCELLED;
				723	}
				724	// Since we aren't using the key storage, clear it out.
				725	key_storage_.clear();
				726	return util::Status::OK;
				727	}
				728
				729	JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() {
				730	SkipWhitespace();
				731
				732	int size = p_.size();
				733	if (size == 0) {
				734	// If we ran out of data, report unknown and we'll place the previous parse
				735	// type onto the stack and try again when we have more data.
				736	return UNKNOWN;
				737	}
				738	// TODO(sven): Split this method based on context since different contexts
				739	// support different tokens. Would slightly speed up processing?
				740	const char* data = p_.data();
				741	if (data == '\"' \|\| data == '\'') return BEGIN_STRING;
				742	if (data == '-' \|\| ('0' <= data && *data <= '9')) {
				743	return BEGIN_NUMBER;
				744	}
				745	if (size >= true_len && !strncmp(data, "true", true_len)) {
				746	return BEGIN_TRUE;
				747	}
				748	if (size >= false_len && !strncmp(data, "false", false_len)) {
				749	return BEGIN_FALSE;
				750	}
				751	if (size >= null_len && !strncmp(data, "null", null_len)) {
				752	return BEGIN_NULL;
				753	}
				754	if (*data == '{') return BEGIN_OBJECT;
				755	if (*data == '}') return END_OBJECT;
				756	if (*data == '[') return BEGIN_ARRAY;
				757	if (*data == ']') return END_ARRAY;
				758	if (*data == ':') return ENTRY_SEPARATOR;
				759	if (*data == ',') return VALUE_SEPARATOR;
				760	if (MatchKey(p_)) {
				761	return BEGIN_KEY;
				762	}
				763
				764	// We don't know that we necessarily have an invalid token here, just that we
				765	// can't parse what we have so far. So we don't report an error and just
				766	// return UNKNOWN so we can try again later when we have more data, or if we
				767	// finish and we have leftovers.
				768	return UNKNOWN;
				769	}
				770
				771	} // namespace converter
				772	} // namespace util
				773	} // namespace protobuf
				774	} // namespace google