blob: c5111f8d708653bddbff6cc5e1c76b56d6159ae5 [file] [log] [blame]
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -04001// Copyright 2007-2011 Baptiste Lepilleur
2// Distributed under MIT license, or public domain if desired and
3// recognized in your jurisdiction.
4// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
5
6#if !defined(JSON_IS_AMALGAMATION)
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -05007#include <json/assertions.h>
8#include <json/reader.h>
9#include <json/value.h>
10#include "json_tool.h"
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040011#endif // if !defined(JSON_IS_AMALGAMATION)
12#include <utility>
13#include <cstdio>
14#include <cassert>
15#include <cstring>
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050016#include <istream>
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040017
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050018#if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below
19#define snprintf _snprintf
20#endif
21
22#if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
23// Disable warning about strdup being deprecated.
24#pragma warning(disable : 4996)
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040025#endif
26
27namespace Json {
28
29// Implementation of class Features
30// ////////////////////////////////
31
32Features::Features()
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050033 : allowComments_(true), strictRoot_(false),
34 allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040035
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050036Features Features::all() { return Features(); }
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040037
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050038Features Features::strictMode() {
39 Features features;
40 features.allowComments_ = false;
41 features.strictRoot_ = true;
42 features.allowDroppedNullPlaceholders_ = false;
43 features.allowNumericKeys_ = false;
44 return features;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040045}
46
47// Implementation of class Reader
48// ////////////////////////////////
49
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050050static inline bool in(Reader::Char c,
51 Reader::Char c1,
52 Reader::Char c2,
53 Reader::Char c3,
54 Reader::Char c4) {
55 return c == c1 || c == c2 || c == c3 || c == c4;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040056}
57
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050058static inline bool in(Reader::Char c,
59 Reader::Char c1,
60 Reader::Char c2,
61 Reader::Char c3,
62 Reader::Char c4,
63 Reader::Char c5) {
64 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040065}
66
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050067static bool containsNewLine(Reader::Location begin, Reader::Location end) {
68 for (; begin < end; ++begin)
69 if (*begin == '\n' || *begin == '\r')
70 return true;
71 return false;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040072}
73
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040074// Class Reader
75// //////////////////////////////////////////////////////////////////
76
77Reader::Reader()
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050078 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
79 lastValue_(), commentsBefore_(), features_(Features::all()),
80 collectComments_() {}
81
82Reader::Reader(const Features& features)
83 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
84 lastValue_(), commentsBefore_(), features_(features), collectComments_() {
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040085}
86
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040087bool
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050088Reader::parse(const std::string& document, Value& root, bool collectComments) {
89 document_ = document;
90 const char* begin = document_.c_str();
91 const char* end = begin + document_.length();
92 return parse(begin, end, root, collectComments);
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -040093}
94
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -050095bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
96 // std::istream_iterator<char> begin(sin);
97 // std::istream_iterator<char> end;
98 // Those would allow streamed input from a file, if parse() were a
99 // template function.
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400100
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500101 // Since std::string is reference-counted, this at least does not
102 // create an extra copy.
103 std::string doc;
104 std::getline(sin, doc, (char)EOF);
105 return parse(doc, root, collectComments);
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400106}
107
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500108bool Reader::parse(const char* beginDoc,
109 const char* endDoc,
110 Value& root,
111 bool collectComments) {
112 if (!features_.allowComments_) {
113 collectComments = false;
114 }
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400115
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500116 begin_ = beginDoc;
117 end_ = endDoc;
118 collectComments_ = collectComments;
119 current_ = begin_;
120 lastValueEnd_ = 0;
121 lastValue_ = 0;
122 commentsBefore_ = "";
123 errors_.clear();
124 while (!nodes_.empty())
125 nodes_.pop();
126 nodes_.push(&root);
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400127
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500128 bool successful = readValue();
129 Token token;
130 skipCommentTokens(token);
131 if (collectComments_ && !commentsBefore_.empty())
132 root.setComment(commentsBefore_, commentAfter);
133 if (features_.strictRoot_) {
134 if (!root.isArray() && !root.isObject()) {
135 // Set error location to start of doc, ideally should be first token found
136 // in doc
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400137 token.type_ = tokenError;
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500138 token.start_ = beginDoc;
139 token.end_ = endDoc;
140 addError(
141 "A valid JSON document must be either an array or an object value.",
142 token);
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400143 return false;
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500144 }
145 }
146 return successful;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400147}
148
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500149bool Reader::readValue() {
150 Token token;
151 skipCommentTokens(token);
152 bool successful = true;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400153
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500154 if (collectComments_ && !commentsBefore_.empty()) {
155 // Remove newline characters at the end of the comments
156 size_t lastNonNewline = commentsBefore_.find_last_not_of("\r\n");
157 if (lastNonNewline != std::string::npos) {
158 commentsBefore_.erase(lastNonNewline + 1);
159 } else {
160 commentsBefore_.clear();
161 }
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400162
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500163 currentValue().setComment(commentsBefore_, commentBefore);
164 commentsBefore_ = "";
165 }
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400166
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500167 switch (token.type_) {
168 case tokenObjectBegin:
169 successful = readObject(token);
170 currentValue().setOffsetLimit(current_ - begin_);
171 break;
172 case tokenArrayBegin:
173 successful = readArray(token);
174 currentValue().setOffsetLimit(current_ - begin_);
175 break;
176 case tokenNumber:
177 successful = decodeNumber(token);
178 break;
179 case tokenString:
180 successful = decodeString(token);
181 break;
182 case tokenTrue:
183 currentValue() = true;
184 currentValue().setOffsetStart(token.start_ - begin_);
185 currentValue().setOffsetLimit(token.end_ - begin_);
186 break;
187 case tokenFalse:
188 currentValue() = false;
189 currentValue().setOffsetStart(token.start_ - begin_);
190 currentValue().setOffsetLimit(token.end_ - begin_);
191 break;
192 case tokenNull:
193 currentValue() = Value();
194 currentValue().setOffsetStart(token.start_ - begin_);
195 currentValue().setOffsetLimit(token.end_ - begin_);
196 break;
197 case tokenArraySeparator:
198 if (features_.allowDroppedNullPlaceholders_) {
199 // "Un-read" the current token and mark the current value as a null
200 // token.
201 current_--;
202 currentValue() = Value();
203 currentValue().setOffsetStart(current_ - begin_ - 1);
204 currentValue().setOffsetLimit(current_ - begin_);
205 break;
206 }
207 // Else, fall through...
208 default:
209 currentValue().setOffsetStart(token.start_ - begin_);
210 currentValue().setOffsetLimit(token.end_ - begin_);
211 return addError("Syntax error: value, object or array expected.", token);
212 }
213
214 if (collectComments_) {
215 lastValueEnd_ = current_;
216 lastValue_ = &currentValue();
217 }
218
219 return successful;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400220}
221
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500222void Reader::skipCommentTokens(Token& token) {
223 if (features_.allowComments_) {
224 do {
225 readToken(token);
226 } while (token.type_ == tokenComment);
227 } else {
228 readToken(token);
229 }
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400230}
231
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500232bool Reader::expectToken(TokenType type, Token& token, const char* message) {
233 readToken(token);
234 if (token.type_ != type)
235 return addError(message, token);
236 return true;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400237}
238
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500239bool Reader::readToken(Token& token) {
240 skipSpaces();
241 token.start_ = current_;
242 Char c = getNextChar();
243 bool ok = true;
244 switch (c) {
245 case '{':
246 token.type_ = tokenObjectBegin;
247 break;
248 case '}':
249 token.type_ = tokenObjectEnd;
250 break;
251 case '[':
252 token.type_ = tokenArrayBegin;
253 break;
254 case ']':
255 token.type_ = tokenArrayEnd;
256 break;
257 case '"':
258 token.type_ = tokenString;
259 ok = readString();
260 break;
261 case '/':
262 token.type_ = tokenComment;
263 ok = readComment();
264 break;
265 case '0':
266 case '1':
267 case '2':
268 case '3':
269 case '4':
270 case '5':
271 case '6':
272 case '7':
273 case '8':
274 case '9':
275 case '-':
276 token.type_ = tokenNumber;
277 readNumber();
278 break;
279 case 't':
280 token.type_ = tokenTrue;
281 ok = match("rue", 3);
282 break;
283 case 'f':
284 token.type_ = tokenFalse;
285 ok = match("alse", 4);
286 break;
287 case 'n':
288 token.type_ = tokenNull;
289 ok = match("ull", 3);
290 break;
291 case ',':
292 token.type_ = tokenArraySeparator;
293 break;
294 case ':':
295 token.type_ = tokenMemberSeparator;
296 break;
297 case 0:
298 token.type_ = tokenEndOfStream;
299 break;
300 default:
301 ok = false;
302 break;
303 }
304 if (!ok)
305 token.type_ = tokenError;
306 token.end_ = current_;
307 return true;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400308}
309
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500310void Reader::skipSpaces() {
311 while (current_ != end_) {
312 Char c = *current_;
313 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400314 ++current_;
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500315 else
316 break;
317 }
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400318}
319
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500320bool Reader::match(Location pattern, int patternLength) {
321 if (end_ - current_ < patternLength)
322 return false;
323 int index = patternLength;
324 while (index--)
325 if (current_[index] != pattern[index])
326 return false;
327 current_ += patternLength;
328 return true;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400329}
330
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500331bool Reader::readComment() {
332 Location commentBegin = current_ - 1;
333 Char c = getNextChar();
334 bool successful = false;
335 if (c == '*')
336 successful = readCStyleComment();
337 else if (c == '/')
338 successful = readCppStyleComment();
339 if (!successful)
340 return false;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400341
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500342 if (collectComments_) {
343 CommentPlacement placement = commentBefore;
344 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
345 if (c != '*' || !containsNewLine(commentBegin, current_))
346 placement = commentAfterOnSameLine;
347 }
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400348
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500349 addComment(commentBegin, current_, placement);
350 }
351 return true;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400352}
353
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500354void
355Reader::addComment(Location begin, Location end, CommentPlacement placement) {
356 assert(collectComments_);
357 if (placement == commentAfterOnSameLine) {
358 assert(lastValue_ != 0);
359 lastValue_->setComment(std::string(begin, end), placement);
360 } else {
361 commentsBefore_ += std::string(begin, end);
362 }
363}
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400364
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500365bool Reader::readCStyleComment() {
366 while (current_ != end_) {
367 Char c = getNextChar();
368 if (c == '*' && *current_ == '/')
369 break;
370 }
371 return getNextChar() == '/';
372}
373
374bool Reader::readCppStyleComment() {
375 while (current_ != end_) {
376 Char c = getNextChar();
377 if (c == '\r' || c == '\n')
378 break;
379 }
380 return true;
381}
382
383void Reader::readNumber() {
384 while (current_ != end_) {
385 if (!(*current_ >= '0' && *current_ <= '9') &&
386 !in(*current_, '.', 'e', 'E', '+', '-'))
387 break;
388 ++current_;
389 }
390}
391
392bool Reader::readString() {
393 Char c = 0;
394 while (current_ != end_) {
395 c = getNextChar();
396 if (c == '\\')
397 getNextChar();
398 else if (c == '"')
399 break;
400 }
401 return c == '"';
402}
403
404bool Reader::readObject(Token& tokenStart) {
405 Token tokenName;
406 std::string name;
407 currentValue() = Value(objectValue);
408 currentValue().setOffsetStart(tokenStart.start_ - begin_);
409 while (readToken(tokenName)) {
410 bool initialTokenOk = true;
411 while (tokenName.type_ == tokenComment && initialTokenOk)
412 initialTokenOk = readToken(tokenName);
413 if (!initialTokenOk)
414 break;
415 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400416 return true;
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500417 name = "";
418 if (tokenName.type_ == tokenString) {
419 if (!decodeString(tokenName, name))
420 return recoverFromError(tokenObjectEnd);
421 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
422 Value numberName;
423 if (!decodeNumber(tokenName, numberName))
424 return recoverFromError(tokenObjectEnd);
425 name = numberName.asString();
426 } else {
427 break;
428 }
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400429
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500430 Token colon;
431 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
432 return addErrorAndRecover(
433 "Missing ':' after object member name", colon, tokenObjectEnd);
434 }
435 Value& value = currentValue()[name];
436 nodes_.push(&value);
437 bool ok = readValue();
438 nodes_.pop();
439 if (!ok) // error already set
440 return recoverFromError(tokenObjectEnd);
441
442 Token comma;
443 if (!readToken(comma) ||
444 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
445 comma.type_ != tokenComment)) {
446 return addErrorAndRecover(
447 "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
448 }
449 bool finalizeTokenOk = true;
450 while (comma.type_ == tokenComment && finalizeTokenOk)
451 finalizeTokenOk = readToken(comma);
452 if (comma.type_ == tokenObjectEnd)
453 return true;
454 }
455 return addErrorAndRecover(
456 "Missing '}' or object member name", tokenName, tokenObjectEnd);
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400457}
458
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500459bool Reader::readArray(Token& tokenStart) {
460 currentValue() = Value(arrayValue);
461 currentValue().setOffsetStart(tokenStart.start_ - begin_);
462 skipSpaces();
463 if (*current_ == ']') // empty array
464 {
465 Token endArray;
466 readToken(endArray);
467 return true;
468 }
469 int index = 0;
470 for (;;) {
471 Value& value = currentValue()[index++];
472 nodes_.push(&value);
473 bool ok = readValue();
474 nodes_.pop();
475 if (!ok) // error already set
476 return recoverFromError(tokenArrayEnd);
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400477
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500478 Token token;
479 // Accept Comment after last item in the array.
480 ok = readToken(token);
481 while (token.type_ == tokenComment && ok) {
482 ok = readToken(token);
483 }
484 bool badTokenType =
485 (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
486 if (!ok || badTokenType) {
487 return addErrorAndRecover(
488 "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
489 }
490 if (token.type_ == tokenArrayEnd)
491 break;
492 }
493 return true;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400494}
495
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500496bool Reader::decodeNumber(Token& token) {
497 Value decoded;
498 if (!decodeNumber(token, decoded))
499 return false;
500 currentValue() = decoded;
501 currentValue().setOffsetStart(token.start_ - begin_);
502 currentValue().setOffsetLimit(token.end_ - begin_);
503 return true;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400504}
505
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500506bool Reader::decodeNumber(Token& token, Value& decoded) {
507 bool isDouble = false;
508 for (Location inspect = token.start_; inspect != token.end_; ++inspect) {
509 isDouble = isDouble || in(*inspect, '.', 'e', 'E', '+') ||
510 (*inspect == '-' && inspect != token.start_);
511 }
512 if (isDouble)
513 return decodeDouble(token, decoded);
514 // Attempts to parse the number as an integer. If the number is
515 // larger than the maximum supported value of an integer then
516 // we decode the number as a double.
517 Location current = token.start_;
518 bool isNegative = *current == '-';
519 if (isNegative)
520 ++current;
521 Value::LargestUInt maxIntegerValue =
522 isNegative ? Value::LargestUInt(-Value::minLargestInt)
523 : Value::maxLargestUInt;
524 Value::LargestUInt threshold = maxIntegerValue / 10;
525 Value::LargestUInt value = 0;
526 while (current < token.end_) {
527 Char c = *current++;
528 if (c < '0' || c > '9')
529 return addError("'" + std::string(token.start_, token.end_) +
530 "' is not a number.",
531 token);
532 Value::UInt digit(c - '0');
533 if (value >= threshold) {
534 // We've hit or exceeded the max value divided by 10 (rounded down). If
535 // a) we've only just touched the limit, b) this is the last digit, and
536 // c) it's small enough to fit in that rounding delta, we're okay.
537 // Otherwise treat this number as a double to avoid overflow.
538 if (value > threshold || current != token.end_ ||
539 digit > maxIntegerValue % 10) {
540 return decodeDouble(token, decoded);
541 }
542 }
543 value = value * 10 + digit;
544 }
545 if (isNegative)
546 decoded = -Value::LargestInt(value);
547 else if (value <= Value::LargestUInt(Value::maxInt))
548 decoded = Value::LargestInt(value);
549 else
550 decoded = value;
551 return true;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400552}
553
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500554bool Reader::decodeDouble(Token& token) {
555 Value decoded;
556 if (!decodeDouble(token, decoded))
557 return false;
558 currentValue() = decoded;
559 currentValue().setOffsetStart(token.start_ - begin_);
560 currentValue().setOffsetLimit(token.end_ - begin_);
561 return true;
562}
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400563
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500564bool Reader::decodeDouble(Token& token, Value& decoded) {
565 double value = 0;
566 const int bufferSize = 32;
567 int count;
568 int length = int(token.end_ - token.start_);
569
570 // Sanity check to avoid buffer overflow exploits.
571 if (length < 0) {
572 return addError("Unable to parse token length", token);
573 }
574
575 // Avoid using a string constant for the format control string given to
576 // sscanf, as this can cause hard to debug crashes on OS X. See here for more
577 // info:
578 //
579 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
580 char format[] = "%lf";
581
582 if (length <= bufferSize) {
583 Char buffer[bufferSize + 1];
584 memcpy(buffer, token.start_, length);
585 buffer[length] = 0;
586 count = sscanf(buffer, format, &value);
587 } else {
588 std::string buffer(token.start_, token.end_);
589 count = sscanf(buffer.c_str(), format, &value);
590 }
591
592 if (count != 1)
593 return addError("'" + std::string(token.start_, token.end_) +
594 "' is not a number.",
595 token);
596 decoded = value;
597 return true;
598}
599
600bool Reader::decodeString(Token& token) {
601 std::string decoded;
602 if (!decodeString(token, decoded))
603 return false;
604 currentValue() = decoded;
605 currentValue().setOffsetStart(token.start_ - begin_);
606 currentValue().setOffsetLimit(token.end_ - begin_);
607 return true;
608}
609
610bool Reader::decodeString(Token& token, std::string& decoded) {
611 decoded.reserve(token.end_ - token.start_ - 2);
612 Location current = token.start_ + 1; // skip '"'
613 Location end = token.end_ - 1; // do not include '"'
614 while (current != end) {
615 Char c = *current++;
616 if (c == '"')
617 break;
618 else if (c == '\\') {
619 if (current == end)
620 return addError("Empty escape sequence in string", token, current);
621 Char escape = *current++;
622 switch (escape) {
623 case '"':
624 decoded += '"';
625 break;
626 case '/':
627 decoded += '/';
628 break;
629 case '\\':
630 decoded += '\\';
631 break;
632 case 'b':
633 decoded += '\b';
634 break;
635 case 'f':
636 decoded += '\f';
637 break;
638 case 'n':
639 decoded += '\n';
640 break;
641 case 'r':
642 decoded += '\r';
643 break;
644 case 't':
645 decoded += '\t';
646 break;
647 case 'u': {
648 unsigned int unicode;
649 if (!decodeUnicodeCodePoint(token, current, end, unicode))
650 return false;
651 decoded += codePointToUTF8(unicode);
652 } break;
653 default:
654 return addError("Bad escape sequence in string", token, current);
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400655 }
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500656 } else {
657 decoded += c;
658 }
659 }
660 return true;
661}
662
663bool Reader::decodeUnicodeCodePoint(Token& token,
664 Location& current,
665 Location end,
666 unsigned int& unicode) {
667
668 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
669 return false;
670 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
671 // surrogate pairs
672 if (end - current < 6)
673 return addError(
674 "additional six characters expected to parse unicode surrogate pair.",
675 token,
676 current);
677 unsigned int surrogatePair;
678 if (*(current++) == '\\' && *(current++) == 'u') {
679 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
680 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
681 } else
682 return false;
683 } else
684 return addError("expecting another \\u token to begin the second half of "
685 "a unicode surrogate pair",
686 token,
687 current);
688 }
689 return true;
690}
691
692bool Reader::decodeUnicodeEscapeSequence(Token& token,
693 Location& current,
694 Location end,
695 unsigned int& unicode) {
696 if (end - current < 4)
697 return addError(
698 "Bad unicode escape sequence in string: four digits expected.",
699 token,
700 current);
701 unicode = 0;
702 for (int index = 0; index < 4; ++index) {
703 Char c = *current++;
704 unicode *= 16;
705 if (c >= '0' && c <= '9')
706 unicode += c - '0';
707 else if (c >= 'a' && c <= 'f')
708 unicode += c - 'a' + 10;
709 else if (c >= 'A' && c <= 'F')
710 unicode += c - 'A' + 10;
711 else
712 return addError(
713 "Bad unicode escape sequence in string: hexadecimal digit expected.",
714 token,
715 current);
716 }
717 return true;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400718}
719
720bool
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500721Reader::addError(const std::string& message, Token& token, Location extra) {
722 ErrorInfo info;
723 info.token_ = token;
724 info.message_ = message;
725 info.extra_ = extra;
726 errors_.push_back(info);
727 return false;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400728}
729
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500730bool Reader::recoverFromError(TokenType skipUntilToken) {
731 int errorCount = int(errors_.size());
732 Token skip;
733 for (;;) {
734 if (!readToken(skip))
735 errors_.resize(errorCount); // discard errors caused by recovery
736 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
737 break;
738 }
739 errors_.resize(errorCount);
740 return false;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400741}
742
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500743bool Reader::addErrorAndRecover(const std::string& message,
744 Token& token,
745 TokenType skipUntilToken) {
746 addError(message, token);
747 return recoverFromError(skipUntilToken);
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400748}
749
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500750Value& Reader::currentValue() { return *(nodes_.top()); }
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400751
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500752Reader::Char Reader::getNextChar() {
753 if (current_ == end_)
754 return 0;
755 return *current_++;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400756}
757
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500758void Reader::getLocationLineAndColumn(Location location,
759 int& line,
760 int& column) const {
761 Location current = begin_;
762 Location lastLineStart = current;
763 line = 0;
764 while (current < location && current != end_) {
765 Char c = *current++;
766 if (c == '\r') {
767 if (*current == '\n')
768 ++current;
769 lastLineStart = current;
770 ++line;
771 } else if (c == '\n') {
772 lastLineStart = current;
773 ++line;
774 }
775 }
776 // column & line start at 1
777 column = int(location - lastLineStart) + 1;
778 ++line;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400779}
780
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500781std::string Reader::getLocationLineAndColumn(Location location) const {
782 int line, column;
783 getLocationLineAndColumn(location, line, column);
784 char buffer[18 + 16 + 16 + 1];
785#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
786#if defined(WINCE)
787 _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
788#else
789 sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
790#endif
791#else
792 snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
793#endif
794 return buffer;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400795}
796
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400797// Deprecated. Preserved for backward compatibility
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500798std::string Reader::getFormatedErrorMessages() const {
799 return getFormattedErrorMessages();
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400800}
801
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500802std::string Reader::getFormattedErrorMessages() const {
803 std::string formattedMessage;
804 for (Errors::const_iterator itError = errors_.begin();
805 itError != errors_.end();
806 ++itError) {
807 const ErrorInfo& error = *itError;
808 formattedMessage +=
809 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
810 formattedMessage += " " + error.message_ + "\n";
811 if (error.extra_)
812 formattedMessage +=
813 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
814 }
815 return formattedMessage;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400816}
817
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500818std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
819 std::vector<Reader::StructuredError> allErrors;
820 for (Errors::const_iterator itError = errors_.begin();
821 itError != errors_.end();
822 ++itError) {
823 const ErrorInfo& error = *itError;
824 Reader::StructuredError structured;
825 structured.offset_start = error.token_.start_ - begin_;
826 structured.offset_limit = error.token_.end_ - begin_;
827 structured.message = error.message_;
828 allErrors.push_back(structured);
829 }
830 return allErrors;
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400831}
832
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -0500833bool Reader::pushError(const Value& value, const std::string& message) {
834 size_t length = end_ - begin_;
835 if(value.getOffsetStart() > length
836 || value.getOffsetLimit() > length)
837 return false;
838 Token token;
839 token.type_ = tokenError;
840 token.start_ = begin_ + value.getOffsetStart();
841 token.end_ = end_ + value.getOffsetLimit();
842 ErrorInfo info;
843 info.token_ = token;
844 info.message_ = message;
845 info.extra_ = 0;
846 errors_.push_back(info);
847 return true;
848}
849
850bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) {
851 size_t length = end_ - begin_;
852 if(value.getOffsetStart() > length
853 || value.getOffsetLimit() > length
854 || extra.getOffsetLimit() > length)
855 return false;
856 Token token;
857 token.type_ = tokenError;
858 token.start_ = begin_ + value.getOffsetStart();
859 token.end_ = begin_ + value.getOffsetLimit();
860 ErrorInfo info;
861 info.token_ = token;
862 info.message_ = message;
863 info.extra_ = begin_ + extra.getOffsetStart();
864 errors_.push_back(info);
865 return true;
866}
867
868bool Reader::good() const {
869 return !errors_.size();
870}
871
872std::istream& operator>>(std::istream& sin, Value& root) {
873 Json::Reader reader;
874 bool ok = reader.parse(sin, root, true);
875 if (!ok) {
876 fprintf(stderr,
877 "Error from reader: %s",
878 reader.getFormattedErrorMessages().c_str());
879
880 JSON_FAIL_MESSAGE("reader error");
881 }
882 return sin;
883}
Leon Scroggins IIIf59fb0e2014-05-28 15:19:42 -0400884
885} // namespace Json