blob: 50f63f8db3046998eb9a896fd7573317e4655c19 [file] [log] [blame]
Jesse Wilson76d7e202010-08-03 17:55:09 -07001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.util;
18
kopriva2cb96ab2018-10-03 14:10:27 -070019import libcore.internal.StringPool;
20
Jesse Wilson847cf342011-04-21 11:28:31 -070021import java.io.Closeable;
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -080022import java.io.EOFException;
Jesse Wilson76d7e202010-08-03 17:55:09 -070023import java.io.IOException;
24import java.io.Reader;
Jesse Wilson76d7e202010-08-03 17:55:09 -070025import java.util.ArrayList;
26import java.util.List;
kopriva2cb96ab2018-10-03 14:10:27 -070027
Jesse Wilson76d7e202010-08-03 17:55:09 -070028
29/**
30 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
31 * encoded value as a stream of tokens. This stream includes both literal
32 * values (strings, numbers, booleans, and nulls) as well as the begin and
33 * end delimiters of objects and arrays. The tokens are traversed in
34 * depth-first order, the same order that they appear in the JSON document.
35 * Within JSON objects, name/value pairs are represented by a single token.
36 *
37 * <h3>Parsing JSON</h3>
Jesse Wilson3312b292010-10-15 17:33:54 -070038 * To create a recursive descent parser for your own JSON streams, first create
39 * an entry point method that creates a {@code JsonReader}.
Jesse Wilson76d7e202010-08-03 17:55:09 -070040 *
41 * <p>Next, create handler methods for each structure in your JSON text. You'll
42 * need a method for each object type and for each array type.
43 * <ul>
44 * <li>Within <strong>array handling</strong> methods, first call {@link
45 * #beginArray} to consume the array's opening bracket. Then create a
46 * while loop that accumulates values, terminating when {@link #hasNext}
47 * is false. Finally, read the array's closing bracket by calling {@link
48 * #endArray}.
49 * <li>Within <strong>object handling</strong> methods, first call {@link
50 * #beginObject} to consume the object's opening brace. Then create a
51 * while loop that assigns values to local variables based on their name.
52 * This loop should terminate when {@link #hasNext} is false. Finally,
53 * read the object's closing brace by calling {@link #endObject}.
54 * </ul>
55 * <p>When a nested object or array is encountered, delegate to the
56 * corresponding handler method.
57 *
58 * <p>When an unknown name is encountered, strict parsers should fail with an
59 * exception. Lenient parsers should call {@link #skipValue()} to recursively
60 * skip the value's nested tokens, which may otherwise conflict.
61 *
62 * <p>If a value may be null, you should first check using {@link #peek()}.
63 * Null literals can be consumed using either {@link #nextNull()} or {@link
64 * #skipValue()}.
65 *
66 * <h3>Example</h3>
67 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code
68 * [
69 * {
70 * "id": 912345678901,
71 * "text": "How do I read JSON on Android?",
72 * "geo": null,
73 * "user": {
74 * "name": "android_newb",
75 * "followers_count": 41
76 * }
77 * },
78 * {
79 * "id": 912345678902,
80 * "text": "@android_newb just use android.util.JsonReader!",
81 * "geo": [50.454722, -104.606667],
82 * "user": {
83 * "name": "jesse",
84 * "followers_count": 2
85 * }
86 * }
87 * ]}</pre>
88 * This code implements the parser for the above structure: <pre> {@code
89 *
90 * public List<Message> readJsonStream(InputStream in) throws IOException {
91 * JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
Jesse Wilson9d30ea02011-03-16 15:02:48 -070092 * try {
93 * return readMessagesArray(reader);
94 * } finally {
95 * reader.close();
96 * }
Jesse Wilson76d7e202010-08-03 17:55:09 -070097 * }
98 *
99 * public List<Message> readMessagesArray(JsonReader reader) throws IOException {
100 * List<Message> messages = new ArrayList<Message>();
101 *
102 * reader.beginArray();
103 * while (reader.hasNext()) {
104 * messages.add(readMessage(reader));
105 * }
106 * reader.endArray();
107 * return messages;
108 * }
109 *
110 * public Message readMessage(JsonReader reader) throws IOException {
111 * long id = -1;
112 * String text = null;
113 * User user = null;
114 * List<Double> geo = null;
115 *
116 * reader.beginObject();
117 * while (reader.hasNext()) {
118 * String name = reader.nextName();
119 * if (name.equals("id")) {
120 * id = reader.nextLong();
121 * } else if (name.equals("text")) {
122 * text = reader.nextString();
123 * } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
124 * geo = readDoublesArray(reader);
125 * } else if (name.equals("user")) {
126 * user = readUser(reader);
127 * } else {
128 * reader.skipValue();
129 * }
130 * }
131 * reader.endObject();
132 * return new Message(id, text, user, geo);
133 * }
134 *
135 * public List<Double> readDoublesArray(JsonReader reader) throws IOException {
136 * List<Double> doubles = new ArrayList<Double>();
137 *
138 * reader.beginArray();
139 * while (reader.hasNext()) {
140 * doubles.add(reader.nextDouble());
141 * }
142 * reader.endArray();
143 * return doubles;
144 * }
145 *
146 * public User readUser(JsonReader reader) throws IOException {
147 * String username = null;
148 * int followersCount = -1;
149 *
150 * reader.beginObject();
151 * while (reader.hasNext()) {
152 * String name = reader.nextName();
153 * if (name.equals("name")) {
154 * username = reader.nextString();
155 * } else if (name.equals("followers_count")) {
156 * followersCount = reader.nextInt();
157 * } else {
158 * reader.skipValue();
159 * }
160 * }
161 * reader.endObject();
162 * return new User(username, followersCount);
163 * }}</pre>
164 *
165 * <h3>Number Handling</h3>
166 * This reader permits numeric values to be read as strings and string values to
167 * be read as numbers. For example, both elements of the JSON array {@code
168 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}.
169 * This behavior is intended to prevent lossy numeric conversions: double is
170 * JavaScript's only numeric type and very large values like {@code
171 * 9007199254740993} cannot be represented exactly on that platform. To minimize
172 * precision loss, extremely large values should be written and read as strings
173 * in JSON.
174 *
175 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances
176 * of this class are not thread safe.
177 */
178public final class JsonReader implements Closeable {
179
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800180 private static final String TRUE = "true";
181 private static final String FALSE = "false";
182
Jesse Wilson847cf342011-04-21 11:28:31 -0700183 private final StringPool stringPool = new StringPool();
184
Jesse Wilson76d7e202010-08-03 17:55:09 -0700185 /** The input JSON. */
186 private final Reader in;
187
Jesse Wilson1ba41712010-08-06 16:08:59 -0700188 /** True to accept non-spec compliant JSON */
189 private boolean lenient = false;
190
Jesse Wilson76d7e202010-08-03 17:55:09 -0700191 /**
192 * Use a manual buffer to easily read and unread upcoming characters, and
193 * also so we can create strings without an intermediate StringBuilder.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800194 * We decode literals directly out of this buffer, so it must be at least as
195 * long as the longest token that can be reported as a number.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700196 */
197 private final char[] buffer = new char[1024];
198 private int pos = 0;
199 private int limit = 0;
200
Jesse Wilsonfebae4e2011-07-18 12:58:03 -0700201 /*
202 * The offset of the first character in the buffer.
203 */
204 private int bufferStartLine = 1;
205 private int bufferStartColumn = 1;
206
Jesse Wilson76d7e202010-08-03 17:55:09 -0700207 private final List<JsonScope> stack = new ArrayList<JsonScope>();
208 {
209 push(JsonScope.EMPTY_DOCUMENT);
210 }
211
212 /**
Jesse Wilson76d7e202010-08-03 17:55:09 -0700213 * The type of the next token to be returned by {@link #peek} and {@link
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800214 * #advance}. If null, peek() will assign a value.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700215 */
216 private JsonToken token;
217
218 /** The text of the next name. */
219 private String name;
220
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800221 /*
222 * For the next literal value, we may have the text value, or the position
223 * and length in the buffer.
224 */
Jesse Wilson76d7e202010-08-03 17:55:09 -0700225 private String value;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800226 private int valuePos;
227 private int valueLength;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700228
Jesse Wilsond07fb882010-08-06 19:30:04 -0700229 /** True if we're currently handling a skipValue() call. */
230 private boolean skipping = false;
231
Jesse Wilson76d7e202010-08-03 17:55:09 -0700232 /**
233 * Creates a new instance that reads a JSON-encoded stream from {@code in}.
234 */
235 public JsonReader(Reader in) {
236 if (in == null) {
237 throw new NullPointerException("in == null");
238 }
239 this.in = in;
240 }
241
242 /**
Jesse Wilson1ba41712010-08-06 16:08:59 -0700243 * Configure this parser to be be liberal in what it accepts. By default,
244 * this parser is strict and only accepts JSON as specified by <a
245 * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the
246 * parser to lenient causes it to ignore the following syntax errors:
247 *
248 * <ul>
249 * <li>End of line comments starting with {@code //} or {@code #} and
250 * ending with a newline character.
251 * <li>C-style comments starting with {@code /*} and ending with
252 * {@code *}{@code /}. Such comments may not be nested.
253 * <li>Names that are unquoted or {@code 'single quoted'}.
254 * <li>Strings that are unquoted or {@code 'single quoted'}.
255 * <li>Array elements separated by {@code ;} instead of {@code ,}.
256 * <li>Unnecessary array separators. These are interpreted as if null
257 * was the omitted value.
258 * <li>Names and values separated by {@code =} or {@code =>} instead of
259 * {@code :}.
260 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}.
261 * </ul>
262 */
263 public void setLenient(boolean lenient) {
264 this.lenient = lenient;
265 }
266
267 /**
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800268 * Returns true if this parser is liberal in what it accepts.
269 */
270 public boolean isLenient() {
271 return lenient;
272 }
273
274 /**
Jesse Wilson76d7e202010-08-03 17:55:09 -0700275 * Consumes the next token from the JSON stream and asserts that it is the
276 * beginning of a new array.
277 */
278 public void beginArray() throws IOException {
279 expect(JsonToken.BEGIN_ARRAY);
280 }
281
282 /**
283 * Consumes the next token from the JSON stream and asserts that it is the
284 * end of the current array.
285 */
286 public void endArray() throws IOException {
287 expect(JsonToken.END_ARRAY);
288 }
289
290 /**
291 * Consumes the next token from the JSON stream and asserts that it is the
292 * beginning of a new object.
293 */
294 public void beginObject() throws IOException {
295 expect(JsonToken.BEGIN_OBJECT);
296 }
297
298 /**
299 * Consumes the next token from the JSON stream and asserts that it is the
kopriva2cb96ab2018-10-03 14:10:27 -0700300 * end of the current object.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700301 */
302 public void endObject() throws IOException {
303 expect(JsonToken.END_OBJECT);
304 }
305
306 /**
Jesse Wilson1ba41712010-08-06 16:08:59 -0700307 * Consumes {@code expected}.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700308 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700309 private void expect(JsonToken expected) throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800310 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700311 if (token != expected) {
312 throw new IllegalStateException("Expected " + expected + " but was " + peek());
Jesse Wilson76d7e202010-08-03 17:55:09 -0700313 }
314 advance();
315 }
316
317 /**
318 * Returns true if the current array or object has another element.
319 */
320 public boolean hasNext() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800321 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700322 return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700323 }
324
325 /**
326 * Returns the type of the next token without consuming it.
327 */
328 public JsonToken peek() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800329 if (token != null) {
330 return token;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700331 }
332
333 switch (peekStack()) {
334 case EMPTY_DOCUMENT:
335 replaceTop(JsonScope.NONEMPTY_DOCUMENT);
336 JsonToken firstToken = nextValue();
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800337 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700338 throw new IOException(
339 "Expected JSON document to start with '[' or '{' but was " + token);
340 }
341 return firstToken;
342 case EMPTY_ARRAY:
343 return nextInArray(true);
344 case NONEMPTY_ARRAY:
345 return nextInArray(false);
346 case EMPTY_OBJECT:
347 return nextInObject(true);
348 case DANGLING_NAME:
349 return objectValue();
350 case NONEMPTY_OBJECT:
351 return nextInObject(false);
352 case NONEMPTY_DOCUMENT:
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800353 try {
354 JsonToken token = nextValue();
355 if (lenient) {
356 return token;
357 }
358 throw syntaxError("Expected EOF");
359 } catch (EOFException e) {
360 return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here?
361 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700362 case CLOSED:
363 throw new IllegalStateException("JsonReader is closed");
364 default:
365 throw new AssertionError();
366 }
367 }
368
369 /**
370 * Advances the cursor in the JSON stream to the next token.
371 */
372 private JsonToken advance() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800373 peek();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700374
375 JsonToken result = token;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700376 token = null;
377 value = null;
378 name = null;
379 return result;
380 }
381
382 /**
383 * Returns the next token, a {@link JsonToken#NAME property name}, and
384 * consumes it.
385 *
386 * @throws IOException if the next token in the stream is not a property
387 * name.
388 */
389 public String nextName() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800390 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700391 if (token != JsonToken.NAME) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700392 throw new IllegalStateException("Expected a name but was " + peek());
393 }
394 String result = name;
395 advance();
396 return result;
397 }
398
399 /**
400 * Returns the {@link JsonToken#STRING string} value of the next token,
401 * consuming it. If the next token is a number, this method will return its
402 * string form.
403 *
404 * @throws IllegalStateException if the next token is not a string or if
405 * this reader is closed.
406 */
407 public String nextString() throws IOException {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700408 peek();
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800409 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700410 throw new IllegalStateException("Expected a string but was " + peek());
411 }
412
413 String result = value;
414 advance();
415 return result;
416 }
417
418 /**
419 * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token,
420 * consuming it.
421 *
422 * @throws IllegalStateException if the next token is not a boolean or if
423 * this reader is closed.
424 */
425 public boolean nextBoolean() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800426 peek();
427 if (token != JsonToken.BOOLEAN) {
428 throw new IllegalStateException("Expected a boolean but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700429 }
430
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800431 boolean result = (value == TRUE);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700432 advance();
433 return result;
434 }
435
436 /**
437 * Consumes the next token from the JSON stream and asserts that it is a
438 * literal null.
439 *
440 * @throws IllegalStateException if the next token is not null or if this
441 * reader is closed.
442 */
443 public void nextNull() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800444 peek();
445 if (token != JsonToken.NULL) {
446 throw new IllegalStateException("Expected null but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700447 }
448
449 advance();
450 }
451
452 /**
453 * Returns the {@link JsonToken#NUMBER double} value of the next token,
454 * consuming it. If the next token is a string, this method will attempt to
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800455 * parse it as a double using {@link Double#parseDouble(String)}.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700456 *
457 * @throws IllegalStateException if the next token is not a literal value.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700458 */
459 public double nextDouble() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800460 peek();
461 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
462 throw new IllegalStateException("Expected a double but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700463 }
464
465 double result = Double.parseDouble(value);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700466 advance();
467 return result;
468 }
469
470 /**
471 * Returns the {@link JsonToken#NUMBER long} value of the next token,
472 * consuming it. If the next token is a string, this method will attempt to
473 * parse it as a long. If the next token's numeric value cannot be exactly
474 * represented by a Java {@code long}, this method throws.
475 *
476 * @throws IllegalStateException if the next token is not a literal value.
477 * @throws NumberFormatException if the next literal value cannot be parsed
478 * as a number, or exactly represented as a long.
479 */
480 public long nextLong() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800481 peek();
482 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
483 throw new IllegalStateException("Expected a long but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700484 }
485
486 long result;
487 try {
488 result = Long.parseLong(value);
489 } catch (NumberFormatException ignored) {
490 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
491 result = (long) asDouble;
492 if ((double) result != asDouble) {
493 throw new NumberFormatException(value);
494 }
495 }
496
Jesse Wilson76d7e202010-08-03 17:55:09 -0700497 advance();
498 return result;
499 }
500
501 /**
502 * Returns the {@link JsonToken#NUMBER int} value of the next token,
503 * consuming it. If the next token is a string, this method will attempt to
504 * parse it as an int. If the next token's numeric value cannot be exactly
505 * represented by a Java {@code int}, this method throws.
506 *
507 * @throws IllegalStateException if the next token is not a literal value.
508 * @throws NumberFormatException if the next literal value cannot be parsed
509 * as a number, or exactly represented as an int.
510 */
511 public int nextInt() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800512 peek();
513 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
514 throw new IllegalStateException("Expected an int but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700515 }
516
517 int result;
518 try {
519 result = Integer.parseInt(value);
520 } catch (NumberFormatException ignored) {
521 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
522 result = (int) asDouble;
523 if ((double) result != asDouble) {
524 throw new NumberFormatException(value);
525 }
526 }
527
Jesse Wilson76d7e202010-08-03 17:55:09 -0700528 advance();
529 return result;
530 }
531
532 /**
533 * Closes this JSON reader and the underlying {@link Reader}.
534 */
535 public void close() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700536 value = null;
537 token = null;
538 stack.clear();
539 stack.add(JsonScope.CLOSED);
540 in.close();
541 }
542
543 /**
544 * Skips the next value recursively. If it is an object or array, all nested
545 * elements are skipped. This method is intended for use when the JSON token
546 * stream contains unrecognized or unhandled values.
547 */
548 public void skipValue() throws IOException {
Jesse Wilsond07fb882010-08-06 19:30:04 -0700549 skipping = true;
550 try {
Calin Juravle8fbcc6b2014-02-18 19:08:39 +0000551 if (!hasNext() || peek() == JsonToken.END_DOCUMENT) {
552 throw new IllegalStateException("No element left to skip");
553 }
Jesse Wilsond07fb882010-08-06 19:30:04 -0700554 int count = 0;
555 do {
556 JsonToken token = advance();
557 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) {
558 count++;
559 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) {
560 count--;
561 }
562 } while (count != 0);
563 } finally {
564 skipping = false;
565 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700566 }
567
568 private JsonScope peekStack() {
569 return stack.get(stack.size() - 1);
570 }
571
572 private JsonScope pop() {
573 return stack.remove(stack.size() - 1);
574 }
575
576 private void push(JsonScope newTop) {
577 stack.add(newTop);
578 }
579
580 /**
581 * Replace the value on the top of the stack with the given value.
582 */
583 private void replaceTop(JsonScope newTop) {
584 stack.set(stack.size() - 1, newTop);
585 }
586
587 private JsonToken nextInArray(boolean firstElement) throws IOException {
588 if (firstElement) {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700589 replaceTop(JsonScope.NONEMPTY_ARRAY);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700590 } else {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700591 /* Look for a comma before each element after the first element. */
Jesse Wilson76d7e202010-08-03 17:55:09 -0700592 switch (nextNonWhitespace()) {
593 case ']':
594 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700595 return token = JsonToken.END_ARRAY;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700596 case ';':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700597 checkLenient(); // fall-through
598 case ',':
Jesse Wilson76d7e202010-08-03 17:55:09 -0700599 break;
600 default:
601 throw syntaxError("Unterminated array");
602 }
603 }
604
Jesse Wilson1ba41712010-08-06 16:08:59 -0700605 switch (nextNonWhitespace()) {
606 case ']':
607 if (firstElement) {
608 pop();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700609 return token = JsonToken.END_ARRAY;
610 }
611 // fall-through to handle ",]"
612 case ';':
613 case ',':
614 /* In lenient mode, a 0-length literal means 'null' */
615 checkLenient();
616 pos--;
Jesse Wilson1ba41712010-08-06 16:08:59 -0700617 value = "null";
618 return token = JsonToken.NULL;
619 default:
620 pos--;
621 return nextValue();
622 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700623 }
624
625 private JsonToken nextInObject(boolean firstElement) throws IOException {
626 /*
627 * Read delimiters. Either a comma/semicolon separating this and the
628 * previous name-value pair, or a close brace to denote the end of the
629 * object.
630 */
631 if (firstElement) {
632 /* Peek to see if this is the empty object. */
633 switch (nextNonWhitespace()) {
634 case '}':
635 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700636 return token = JsonToken.END_OBJECT;
637 default:
638 pos--;
639 }
640 } else {
641 switch (nextNonWhitespace()) {
642 case '}':
643 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700644 return token = JsonToken.END_OBJECT;
645 case ';':
646 case ',':
647 break;
648 default:
649 throw syntaxError("Unterminated object");
650 }
651 }
652
653 /* Read the name. */
654 int quote = nextNonWhitespace();
655 switch (quote) {
656 case '\'':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700657 checkLenient(); // fall-through
Jesse Wilson76d7e202010-08-03 17:55:09 -0700658 case '"':
659 name = nextString((char) quote);
660 break;
661 default:
Jesse Wilson1ba41712010-08-06 16:08:59 -0700662 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700663 pos--;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800664 name = nextLiteral(false);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700665 if (name.isEmpty()) {
666 throw syntaxError("Expected name");
667 }
668 }
669
670 replaceTop(JsonScope.DANGLING_NAME);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700671 return token = JsonToken.NAME;
672 }
673
674 private JsonToken objectValue() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700675 /*
Jesse Wilson1ba41712010-08-06 16:08:59 -0700676 * Read the name/value separator. Usually a colon ':'. In lenient mode
677 * we also accept an equals sign '=', or an arrow "=>".
Jesse Wilson76d7e202010-08-03 17:55:09 -0700678 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700679 switch (nextNonWhitespace()) {
680 case ':':
681 break;
682 case '=':
683 checkLenient();
684 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') {
685 pos++;
686 }
687 break;
688 default:
689 throw syntaxError("Expected ':'");
Jesse Wilson76d7e202010-08-03 17:55:09 -0700690 }
691
692 replaceTop(JsonScope.NONEMPTY_OBJECT);
693 return nextValue();
694 }
695
696 private JsonToken nextValue() throws IOException {
697 int c = nextNonWhitespace();
698 switch (c) {
699 case '{':
700 push(JsonScope.EMPTY_OBJECT);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700701 return token = JsonToken.BEGIN_OBJECT;
702
703 case '[':
704 push(JsonScope.EMPTY_ARRAY);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700705 return token = JsonToken.BEGIN_ARRAY;
706
707 case '\'':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700708 checkLenient(); // fall-through
Jesse Wilson76d7e202010-08-03 17:55:09 -0700709 case '"':
710 value = nextString((char) c);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700711 return token = JsonToken.STRING;
712
713 default:
714 pos--;
715 return readLiteral();
716 }
717 }
718
719 /**
720 * Returns true once {@code limit - pos >= minimum}. If the data is
721 * exhausted before that many characters are available, this returns
722 * false.
723 */
724 private boolean fillBuffer(int minimum) throws IOException {
Jesse Wilsonfebae4e2011-07-18 12:58:03 -0700725 // Before clobbering the old characters, update where buffer starts
726 for (int i = 0; i < pos; i++) {
727 if (buffer[i] == '\n') {
728 bufferStartLine++;
729 bufferStartColumn = 1;
730 } else {
731 bufferStartColumn++;
732 }
733 }
734
Jesse Wilson76d7e202010-08-03 17:55:09 -0700735 if (limit != pos) {
736 limit -= pos;
737 System.arraycopy(buffer, pos, buffer, 0, limit);
738 } else {
739 limit = 0;
740 }
741
742 pos = 0;
743 int total;
744 while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
745 limit += total;
Jesse Wilson7a2c8132011-07-20 12:23:23 -0700746
747 // if this is the first read, consume an optional byte order mark (BOM) if it exists
Jesse Wilsond1ad3c22011-07-22 09:01:48 -0700748 if (bufferStartLine == 1 && bufferStartColumn == 1
749 && limit > 0 && buffer[0] == '\ufeff') {
Jesse Wilson7a2c8132011-07-20 12:23:23 -0700750 pos++;
751 bufferStartColumn--;
752 }
753
Jesse Wilson76d7e202010-08-03 17:55:09 -0700754 if (limit >= minimum) {
755 return true;
756 }
757 }
758 return false;
759 }
760
Jesse Wilsonfebae4e2011-07-18 12:58:03 -0700761 private int getLineNumber() {
762 int result = bufferStartLine;
763 for (int i = 0; i < pos; i++) {
764 if (buffer[i] == '\n') {
765 result++;
766 }
767 }
768 return result;
769 }
770
771 private int getColumnNumber() {
772 int result = bufferStartColumn;
773 for (int i = 0; i < pos; i++) {
774 if (buffer[i] == '\n') {
775 result = 1;
776 } else {
777 result++;
778 }
779 }
780 return result;
781 }
782
Jesse Wilson76d7e202010-08-03 17:55:09 -0700783 private int nextNonWhitespace() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700784 while (pos < limit || fillBuffer(1)) {
785 int c = buffer[pos++];
786 switch (c) {
787 case '\t':
788 case ' ':
789 case '\n':
790 case '\r':
791 continue;
792
793 case '/':
794 if (pos == limit && !fillBuffer(1)) {
795 return c;
796 }
797
Jesse Wilson1ba41712010-08-06 16:08:59 -0700798 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700799 char peek = buffer[pos];
800 switch (peek) {
801 case '*':
802 // skip a /* c-style comment */
803 pos++;
804 if (!skipTo("*/")) {
805 throw syntaxError("Unterminated comment");
806 }
807 pos += 2;
808 continue;
809
810 case '/':
811 // skip a // end-of-line comment
812 pos++;
813 skipToEndOfLine();
814 continue;
815
816 default:
817 return c;
818 }
819
820 case '#':
821 /*
822 * Skip a # hash end-of-line comment. The JSON RFC doesn't
823 * specify this behaviour, but it's required to parse
824 * existing documents. See http://b/2571423.
825 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700826 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700827 skipToEndOfLine();
828 continue;
829
830 default:
831 return c;
832 }
833 }
834
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800835 throw new EOFException("End of input");
Jesse Wilson76d7e202010-08-03 17:55:09 -0700836 }
837
Jesse Wilson1ba41712010-08-06 16:08:59 -0700838 private void checkLenient() throws IOException {
839 if (!lenient) {
840 throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
841 }
842 }
843
Jesse Wilson76d7e202010-08-03 17:55:09 -0700844 /**
845 * Advances the position until after the next newline character. If the line
846 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
847 * caller.
848 */
849 private void skipToEndOfLine() throws IOException {
850 while (pos < limit || fillBuffer(1)) {
851 char c = buffer[pos++];
852 if (c == '\r' || c == '\n') {
853 break;
854 }
855 }
856 }
857
858 private boolean skipTo(String toFind) throws IOException {
859 outer:
Jesse Wilsond1ad3c22011-07-22 09:01:48 -0700860 for (; pos + toFind.length() <= limit || fillBuffer(toFind.length()); pos++) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700861 for (int c = 0; c < toFind.length(); c++) {
862 if (buffer[pos + c] != toFind.charAt(c)) {
863 continue outer;
864 }
865 }
866 return true;
867 }
868 return false;
869 }
870
871 /**
872 * Returns the string up to but not including {@code quote}, unescaping any
873 * character escape sequences encountered along the way. The opening quote
874 * should have already been read. This consumes the closing quote, but does
875 * not include it in the returned string.
876 *
877 * @param quote either ' or ".
878 * @throws NumberFormatException if any unicode escape sequences are
879 * malformed.
880 */
881 private String nextString(char quote) throws IOException {
882 StringBuilder builder = null;
883 do {
884 /* the index of the first character not yet appended to the builder. */
885 int start = pos;
886 while (pos < limit) {
887 int c = buffer[pos++];
888
889 if (c == quote) {
Jesse Wilsond07fb882010-08-06 19:30:04 -0700890 if (skipping) {
891 return "skipped!";
892 } else if (builder == null) {
Jesse Wilson847cf342011-04-21 11:28:31 -0700893 return stringPool.get(buffer, start, pos - start - 1);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700894 } else {
895 builder.append(buffer, start, pos - start - 1);
896 return builder.toString();
897 }
898
899 } else if (c == '\\') {
900 if (builder == null) {
901 builder = new StringBuilder();
902 }
903 builder.append(buffer, start, pos - start - 1);
904 builder.append(readEscapeCharacter());
905 start = pos;
906 }
907 }
908
909 if (builder == null) {
910 builder = new StringBuilder();
911 }
912 builder.append(buffer, start, pos - start);
913 } while (fillBuffer(1));
914
915 throw syntaxError("Unterminated string");
916 }
917
918 /**
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800919 * Reads the value up to but not including any delimiter characters. This
Jesse Wilson76d7e202010-08-03 17:55:09 -0700920 * does not consume the delimiter character.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800921 *
922 * @param assignOffsetsOnly true for this method to only set the valuePos
923 * and valueLength fields and return a null result. This only works if
924 * the literal is short; a string is returned otherwise.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700925 */
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800926 private String nextLiteral(boolean assignOffsetsOnly) throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700927 StringBuilder builder = null;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800928 valuePos = -1;
929 valueLength = 0;
930 int i = 0;
Jesse Wilson1ba41712010-08-06 16:08:59 -0700931
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800932 findNonLiteralCharacter:
933 while (true) {
934 for (; pos + i < limit; i++) {
935 switch (buffer[pos + i]) {
936 case '/':
937 case '\\':
938 case ';':
939 case '#':
940 case '=':
941 checkLenient(); // fall-through
942 case '{':
943 case '}':
944 case '[':
945 case ']':
946 case ':':
947 case ',':
948 case ' ':
949 case '\t':
950 case '\f':
951 case '\r':
952 case '\n':
953 break findNonLiteralCharacter;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700954 }
955 }
956
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800957 /*
958 * Attempt to load the entire literal into the buffer at once. If
959 * we run out of input, add a non-literal character at the end so
960 * that decoding doesn't need to do bounds checks.
961 */
962 if (i < buffer.length) {
963 if (fillBuffer(i + 1)) {
964 continue;
965 } else {
966 buffer[limit] = '\0';
967 break;
968 }
969 }
970
971 // use a StringBuilder when the value is too long. It must be an unquoted string.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700972 if (builder == null) {
973 builder = new StringBuilder();
974 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800975 builder.append(buffer, pos, i);
976 valueLength += i;
977 pos += i;
978 i = 0;
979 if (!fillBuffer(1)) {
980 break;
981 }
982 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700983
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800984 String result;
985 if (assignOffsetsOnly && builder == null) {
986 valuePos = pos;
987 result = null;
988 } else if (skipping) {
989 result = "skipped!";
990 } else if (builder == null) {
Jesse Wilson847cf342011-04-21 11:28:31 -0700991 result = stringPool.get(buffer, pos, i);
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800992 } else {
993 builder.append(buffer, pos, i);
994 result = builder.toString();
995 }
996 valueLength += i;
997 pos += i;
998 return result;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700999 }
1000
1001 @Override public String toString() {
1002 return getClass().getSimpleName() + " near " + getSnippet();
1003 }
1004
1005 /**
1006 * Unescapes the character identified by the character or characters that
1007 * immediately follow a backslash. The backslash '\' should have already
1008 * been read. This supports both unicode escapes "u000A" and two-character
1009 * escapes "\n".
1010 *
1011 * @throws NumberFormatException if any unicode escape sequences are
1012 * malformed.
1013 */
1014 private char readEscapeCharacter() throws IOException {
1015 if (pos == limit && !fillBuffer(1)) {
1016 throw syntaxError("Unterminated escape sequence");
1017 }
1018
1019 char escaped = buffer[pos++];
1020 switch (escaped) {
1021 case 'u':
1022 if (pos + 4 > limit && !fillBuffer(4)) {
1023 throw syntaxError("Unterminated escape sequence");
1024 }
Jesse Wilson847cf342011-04-21 11:28:31 -07001025 String hex = stringPool.get(buffer, pos, 4);
Jesse Wilson76d7e202010-08-03 17:55:09 -07001026 pos += 4;
1027 return (char) Integer.parseInt(hex, 16);
1028
1029 case 't':
1030 return '\t';
1031
1032 case 'b':
1033 return '\b';
1034
1035 case 'n':
1036 return '\n';
1037
1038 case 'r':
1039 return '\r';
1040
1041 case 'f':
1042 return '\f';
1043
1044 case '\'':
1045 case '"':
1046 case '\\':
1047 default:
1048 return escaped;
1049 }
1050 }
1051
1052 /**
1053 * Reads a null, boolean, numeric or unquoted string literal value.
1054 */
1055 private JsonToken readLiteral() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001056 value = nextLiteral(true);
1057 if (valueLength == 0) {
Jesse Wilson76d7e202010-08-03 17:55:09 -07001058 throw syntaxError("Expected literal value");
1059 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001060 token = decodeLiteral();
1061 if (token == JsonToken.STRING) {
1062 checkLenient();
1063 }
1064 return token;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001065 }
1066
1067 /**
1068 * Assigns {@code nextToken} based on the value of {@code nextValue}.
1069 */
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001070 private JsonToken decodeLiteral() throws IOException {
1071 if (valuePos == -1) {
1072 // it was too long to fit in the buffer so it can only be a string
1073 return JsonToken.STRING;
1074 } else if (valueLength == 4
1075 && ('n' == buffer[valuePos ] || 'N' == buffer[valuePos ])
1076 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1])
1077 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1078 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) {
1079 value = "null";
1080 return JsonToken.NULL;
1081 } else if (valueLength == 4
1082 && ('t' == buffer[valuePos ] || 'T' == buffer[valuePos ])
1083 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1])
1084 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2])
1085 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) {
1086 value = TRUE;
1087 return JsonToken.BOOLEAN;
1088 } else if (valueLength == 5
1089 && ('f' == buffer[valuePos ] || 'F' == buffer[valuePos ])
1090 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1])
1091 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1092 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3])
1093 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) {
1094 value = FALSE;
1095 return JsonToken.BOOLEAN;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001096 } else {
Jesse Wilson847cf342011-04-21 11:28:31 -07001097 value = stringPool.get(buffer, valuePos, valueLength);
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001098 return decodeNumber(buffer, valuePos, valueLength);
1099 }
1100 }
1101
1102 /**
1103 * Determine whether the characters is a JSON number. Numbers are of the
1104 * form -12.34e+56. Fractional and exponential parts are optional. Leading
1105 * zeroes are not allowed in the value or exponential part, but are allowed
1106 * in the fraction.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001107 */
1108 private JsonToken decodeNumber(char[] chars, int offset, int length) {
1109 int i = offset;
1110 int c = chars[i];
1111
1112 if (c == '-') {
1113 c = chars[++i];
1114 }
1115
1116 if (c == '0') {
1117 c = chars[++i];
1118 } else if (c >= '1' && c <= '9') {
1119 c = chars[++i];
1120 while (c >= '0' && c <= '9') {
1121 c = chars[++i];
Jesse Wilson76d7e202010-08-03 17:55:09 -07001122 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001123 } else {
1124 return JsonToken.STRING;
1125 }
1126
1127 if (c == '.') {
1128 c = chars[++i];
1129 while (c >= '0' && c <= '9') {
1130 c = chars[++i];
1131 }
1132 }
1133
1134 if (c == 'e' || c == 'E') {
1135 c = chars[++i];
1136 if (c == '+' || c == '-') {
1137 c = chars[++i];
1138 }
1139 if (c >= '0' && c <= '9') {
1140 c = chars[++i];
1141 while (c >= '0' && c <= '9') {
1142 c = chars[++i];
1143 }
1144 } else {
1145 return JsonToken.STRING;
1146 }
1147 }
1148
1149 if (i == offset + length) {
1150 return JsonToken.NUMBER;
1151 } else {
1152 return JsonToken.STRING;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001153 }
1154 }
1155
1156 /**
1157 * Throws a new IO exception with the given message and a context snippet
1158 * with this reader's content.
1159 */
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -08001160 private IOException syntaxError(String message) throws IOException {
Jesse Wilsonfebae4e2011-07-18 12:58:03 -07001161 throw new MalformedJsonException(message
1162 + " at line " + getLineNumber() + " column " + getColumnNumber());
Jesse Wilson76d7e202010-08-03 17:55:09 -07001163 }
1164
1165 private CharSequence getSnippet() {
1166 StringBuilder snippet = new StringBuilder();
1167 int beforePos = Math.min(pos, 20);
1168 snippet.append(buffer, pos - beforePos, beforePos);
1169 int afterPos = Math.min(limit - pos, 20);
1170 snippet.append(buffer, pos, afterPos);
1171 return snippet;
1172 }
Jesse Wilson76d7e202010-08-03 17:55:09 -07001173}