blob: 8f44895bab01e632e8337859a154448f1b5599e4 [file] [log] [blame]
Jesse Wilson76d7e202010-08-03 17:55:09 -07001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.util;
18
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -080019import java.io.EOFException;
Jesse Wilson76d7e202010-08-03 17:55:09 -070020import java.io.IOException;
21import java.io.Reader;
22import java.io.Closeable;
23import java.util.ArrayList;
24import java.util.List;
25
26/**
27 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
28 * encoded value as a stream of tokens. This stream includes both literal
29 * values (strings, numbers, booleans, and nulls) as well as the begin and
30 * end delimiters of objects and arrays. The tokens are traversed in
31 * depth-first order, the same order that they appear in the JSON document.
32 * Within JSON objects, name/value pairs are represented by a single token.
33 *
34 * <h3>Parsing JSON</h3>
Jesse Wilson3312b292010-10-15 17:33:54 -070035 * To create a recursive descent parser for your own JSON streams, first create
36 * an entry point method that creates a {@code JsonReader}.
Jesse Wilson76d7e202010-08-03 17:55:09 -070037 *
38 * <p>Next, create handler methods for each structure in your JSON text. You'll
39 * need a method for each object type and for each array type.
40 * <ul>
41 * <li>Within <strong>array handling</strong> methods, first call {@link
42 * #beginArray} to consume the array's opening bracket. Then create a
43 * while loop that accumulates values, terminating when {@link #hasNext}
44 * is false. Finally, read the array's closing bracket by calling {@link
45 * #endArray}.
46 * <li>Within <strong>object handling</strong> methods, first call {@link
47 * #beginObject} to consume the object's opening brace. Then create a
48 * while loop that assigns values to local variables based on their name.
49 * This loop should terminate when {@link #hasNext} is false. Finally,
50 * read the object's closing brace by calling {@link #endObject}.
51 * </ul>
52 * <p>When a nested object or array is encountered, delegate to the
53 * corresponding handler method.
54 *
55 * <p>When an unknown name is encountered, strict parsers should fail with an
56 * exception. Lenient parsers should call {@link #skipValue()} to recursively
57 * skip the value's nested tokens, which may otherwise conflict.
58 *
59 * <p>If a value may be null, you should first check using {@link #peek()}.
60 * Null literals can be consumed using either {@link #nextNull()} or {@link
61 * #skipValue()}.
62 *
63 * <h3>Example</h3>
64 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code
65 * [
66 * {
67 * "id": 912345678901,
68 * "text": "How do I read JSON on Android?",
69 * "geo": null,
70 * "user": {
71 * "name": "android_newb",
72 * "followers_count": 41
73 * }
74 * },
75 * {
76 * "id": 912345678902,
77 * "text": "@android_newb just use android.util.JsonReader!",
78 * "geo": [50.454722, -104.606667],
79 * "user": {
80 * "name": "jesse",
81 * "followers_count": 2
82 * }
83 * }
84 * ]}</pre>
85 * This code implements the parser for the above structure: <pre> {@code
86 *
87 * public List<Message> readJsonStream(InputStream in) throws IOException {
88 * JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
89 * return readMessagesArray(reader);
90 * }
91 *
92 * public List<Message> readMessagesArray(JsonReader reader) throws IOException {
93 * List<Message> messages = new ArrayList<Message>();
94 *
95 * reader.beginArray();
96 * while (reader.hasNext()) {
97 * messages.add(readMessage(reader));
98 * }
99 * reader.endArray();
100 * return messages;
101 * }
102 *
103 * public Message readMessage(JsonReader reader) throws IOException {
104 * long id = -1;
105 * String text = null;
106 * User user = null;
107 * List<Double> geo = null;
108 *
109 * reader.beginObject();
110 * while (reader.hasNext()) {
111 * String name = reader.nextName();
112 * if (name.equals("id")) {
113 * id = reader.nextLong();
114 * } else if (name.equals("text")) {
115 * text = reader.nextString();
116 * } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
117 * geo = readDoublesArray(reader);
118 * } else if (name.equals("user")) {
119 * user = readUser(reader);
120 * } else {
121 * reader.skipValue();
122 * }
123 * }
124 * reader.endObject();
125 * return new Message(id, text, user, geo);
126 * }
127 *
128 * public List<Double> readDoublesArray(JsonReader reader) throws IOException {
129 * List<Double> doubles = new ArrayList<Double>();
130 *
131 * reader.beginArray();
132 * while (reader.hasNext()) {
133 * doubles.add(reader.nextDouble());
134 * }
135 * reader.endArray();
136 * return doubles;
137 * }
138 *
139 * public User readUser(JsonReader reader) throws IOException {
140 * String username = null;
141 * int followersCount = -1;
142 *
143 * reader.beginObject();
144 * while (reader.hasNext()) {
145 * String name = reader.nextName();
146 * if (name.equals("name")) {
147 * username = reader.nextString();
148 * } else if (name.equals("followers_count")) {
149 * followersCount = reader.nextInt();
150 * } else {
151 * reader.skipValue();
152 * }
153 * }
154 * reader.endObject();
155 * return new User(username, followersCount);
156 * }}</pre>
157 *
158 * <h3>Number Handling</h3>
159 * This reader permits numeric values to be read as strings and string values to
160 * be read as numbers. For example, both elements of the JSON array {@code
161 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}.
162 * This behavior is intended to prevent lossy numeric conversions: double is
163 * JavaScript's only numeric type and very large values like {@code
164 * 9007199254740993} cannot be represented exactly on that platform. To minimize
165 * precision loss, extremely large values should be written and read as strings
166 * in JSON.
167 *
168 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances
169 * of this class are not thread safe.
170 */
171public final class JsonReader implements Closeable {
172
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800173 private static final String TRUE = "true";
174 private static final String FALSE = "false";
175
Jesse Wilson76d7e202010-08-03 17:55:09 -0700176 /** The input JSON. */
177 private final Reader in;
178
Jesse Wilson1ba41712010-08-06 16:08:59 -0700179 /** True to accept non-spec compliant JSON */
180 private boolean lenient = false;
181
Jesse Wilson76d7e202010-08-03 17:55:09 -0700182 /**
183 * Use a manual buffer to easily read and unread upcoming characters, and
184 * also so we can create strings without an intermediate StringBuilder.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800185 * We decode literals directly out of this buffer, so it must be at least as
186 * long as the longest token that can be reported as a number.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700187 */
188 private final char[] buffer = new char[1024];
189 private int pos = 0;
190 private int limit = 0;
191
192 private final List<JsonScope> stack = new ArrayList<JsonScope>();
193 {
194 push(JsonScope.EMPTY_DOCUMENT);
195 }
196
197 /**
Jesse Wilson76d7e202010-08-03 17:55:09 -0700198 * The type of the next token to be returned by {@link #peek} and {@link
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800199 * #advance}. If null, peek() will assign a value.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700200 */
201 private JsonToken token;
202
203 /** The text of the next name. */
204 private String name;
205
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800206 /*
207 * For the next literal value, we may have the text value, or the position
208 * and length in the buffer.
209 */
Jesse Wilson76d7e202010-08-03 17:55:09 -0700210 private String value;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800211 private int valuePos;
212 private int valueLength;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700213
Jesse Wilsond07fb882010-08-06 19:30:04 -0700214 /** True if we're currently handling a skipValue() call. */
215 private boolean skipping = false;
216
Jesse Wilson76d7e202010-08-03 17:55:09 -0700217 /**
218 * Creates a new instance that reads a JSON-encoded stream from {@code in}.
219 */
220 public JsonReader(Reader in) {
221 if (in == null) {
222 throw new NullPointerException("in == null");
223 }
224 this.in = in;
225 }
226
227 /**
Jesse Wilson1ba41712010-08-06 16:08:59 -0700228 * Configure this parser to be be liberal in what it accepts. By default,
229 * this parser is strict and only accepts JSON as specified by <a
230 * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the
231 * parser to lenient causes it to ignore the following syntax errors:
232 *
233 * <ul>
234 * <li>End of line comments starting with {@code //} or {@code #} and
235 * ending with a newline character.
236 * <li>C-style comments starting with {@code /*} and ending with
237 * {@code *}{@code /}. Such comments may not be nested.
238 * <li>Names that are unquoted or {@code 'single quoted'}.
239 * <li>Strings that are unquoted or {@code 'single quoted'}.
240 * <li>Array elements separated by {@code ;} instead of {@code ,}.
241 * <li>Unnecessary array separators. These are interpreted as if null
242 * was the omitted value.
243 * <li>Names and values separated by {@code =} or {@code =>} instead of
244 * {@code :}.
245 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}.
246 * </ul>
247 */
248 public void setLenient(boolean lenient) {
249 this.lenient = lenient;
250 }
251
252 /**
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800253 * Returns true if this parser is liberal in what it accepts.
254 */
255 public boolean isLenient() {
256 return lenient;
257 }
258
259 /**
Jesse Wilson76d7e202010-08-03 17:55:09 -0700260 * Consumes the next token from the JSON stream and asserts that it is the
261 * beginning of a new array.
262 */
263 public void beginArray() throws IOException {
264 expect(JsonToken.BEGIN_ARRAY);
265 }
266
267 /**
268 * Consumes the next token from the JSON stream and asserts that it is the
269 * end of the current array.
270 */
271 public void endArray() throws IOException {
272 expect(JsonToken.END_ARRAY);
273 }
274
275 /**
276 * Consumes the next token from the JSON stream and asserts that it is the
277 * beginning of a new object.
278 */
279 public void beginObject() throws IOException {
280 expect(JsonToken.BEGIN_OBJECT);
281 }
282
283 /**
284 * Consumes the next token from the JSON stream and asserts that it is the
285 * end of the current array.
286 */
287 public void endObject() throws IOException {
288 expect(JsonToken.END_OBJECT);
289 }
290
291 /**
Jesse Wilson1ba41712010-08-06 16:08:59 -0700292 * Consumes {@code expected}.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700293 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700294 private void expect(JsonToken expected) throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800295 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700296 if (token != expected) {
297 throw new IllegalStateException("Expected " + expected + " but was " + peek());
Jesse Wilson76d7e202010-08-03 17:55:09 -0700298 }
299 advance();
300 }
301
302 /**
303 * Returns true if the current array or object has another element.
304 */
305 public boolean hasNext() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800306 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700307 return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700308 }
309
310 /**
311 * Returns the type of the next token without consuming it.
312 */
313 public JsonToken peek() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800314 if (token != null) {
315 return token;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700316 }
317
318 switch (peekStack()) {
319 case EMPTY_DOCUMENT:
320 replaceTop(JsonScope.NONEMPTY_DOCUMENT);
321 JsonToken firstToken = nextValue();
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800322 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700323 throw new IOException(
324 "Expected JSON document to start with '[' or '{' but was " + token);
325 }
326 return firstToken;
327 case EMPTY_ARRAY:
328 return nextInArray(true);
329 case NONEMPTY_ARRAY:
330 return nextInArray(false);
331 case EMPTY_OBJECT:
332 return nextInObject(true);
333 case DANGLING_NAME:
334 return objectValue();
335 case NONEMPTY_OBJECT:
336 return nextInObject(false);
337 case NONEMPTY_DOCUMENT:
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800338 try {
339 JsonToken token = nextValue();
340 if (lenient) {
341 return token;
342 }
343 throw syntaxError("Expected EOF");
344 } catch (EOFException e) {
345 return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here?
346 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700347 case CLOSED:
348 throw new IllegalStateException("JsonReader is closed");
349 default:
350 throw new AssertionError();
351 }
352 }
353
354 /**
355 * Advances the cursor in the JSON stream to the next token.
356 */
357 private JsonToken advance() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800358 peek();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700359
360 JsonToken result = token;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700361 token = null;
362 value = null;
363 name = null;
364 return result;
365 }
366
367 /**
368 * Returns the next token, a {@link JsonToken#NAME property name}, and
369 * consumes it.
370 *
371 * @throws IOException if the next token in the stream is not a property
372 * name.
373 */
374 public String nextName() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800375 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700376 if (token != JsonToken.NAME) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700377 throw new IllegalStateException("Expected a name but was " + peek());
378 }
379 String result = name;
380 advance();
381 return result;
382 }
383
384 /**
385 * Returns the {@link JsonToken#STRING string} value of the next token,
386 * consuming it. If the next token is a number, this method will return its
387 * string form.
388 *
389 * @throws IllegalStateException if the next token is not a string or if
390 * this reader is closed.
391 */
392 public String nextString() throws IOException {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700393 peek();
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800394 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700395 throw new IllegalStateException("Expected a string but was " + peek());
396 }
397
398 String result = value;
399 advance();
400 return result;
401 }
402
403 /**
404 * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token,
405 * consuming it.
406 *
407 * @throws IllegalStateException if the next token is not a boolean or if
408 * this reader is closed.
409 */
410 public boolean nextBoolean() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800411 peek();
412 if (token != JsonToken.BOOLEAN) {
413 throw new IllegalStateException("Expected a boolean but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700414 }
415
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800416 boolean result = (value == TRUE);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700417 advance();
418 return result;
419 }
420
421 /**
422 * Consumes the next token from the JSON stream and asserts that it is a
423 * literal null.
424 *
425 * @throws IllegalStateException if the next token is not null or if this
426 * reader is closed.
427 */
428 public void nextNull() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800429 peek();
430 if (token != JsonToken.NULL) {
431 throw new IllegalStateException("Expected null but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700432 }
433
434 advance();
435 }
436
437 /**
438 * Returns the {@link JsonToken#NUMBER double} value of the next token,
439 * consuming it. If the next token is a string, this method will attempt to
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800440 * parse it as a double using {@link Double#parseDouble(String)}.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700441 *
442 * @throws IllegalStateException if the next token is not a literal value.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700443 */
444 public double nextDouble() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800445 peek();
446 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
447 throw new IllegalStateException("Expected a double but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700448 }
449
450 double result = Double.parseDouble(value);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700451 advance();
452 return result;
453 }
454
455 /**
456 * Returns the {@link JsonToken#NUMBER long} value of the next token,
457 * consuming it. If the next token is a string, this method will attempt to
458 * parse it as a long. If the next token's numeric value cannot be exactly
459 * represented by a Java {@code long}, this method throws.
460 *
461 * @throws IllegalStateException if the next token is not a literal value.
462 * @throws NumberFormatException if the next literal value cannot be parsed
463 * as a number, or exactly represented as a long.
464 */
465 public long nextLong() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800466 peek();
467 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
468 throw new IllegalStateException("Expected a long but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700469 }
470
471 long result;
472 try {
473 result = Long.parseLong(value);
474 } catch (NumberFormatException ignored) {
475 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
476 result = (long) asDouble;
477 if ((double) result != asDouble) {
478 throw new NumberFormatException(value);
479 }
480 }
481
Jesse Wilson76d7e202010-08-03 17:55:09 -0700482 advance();
483 return result;
484 }
485
486 /**
487 * Returns the {@link JsonToken#NUMBER int} value of the next token,
488 * consuming it. If the next token is a string, this method will attempt to
489 * parse it as an int. If the next token's numeric value cannot be exactly
490 * represented by a Java {@code int}, this method throws.
491 *
492 * @throws IllegalStateException if the next token is not a literal value.
493 * @throws NumberFormatException if the next literal value cannot be parsed
494 * as a number, or exactly represented as an int.
495 */
496 public int nextInt() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800497 peek();
498 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
499 throw new IllegalStateException("Expected an int but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700500 }
501
502 int result;
503 try {
504 result = Integer.parseInt(value);
505 } catch (NumberFormatException ignored) {
506 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
507 result = (int) asDouble;
508 if ((double) result != asDouble) {
509 throw new NumberFormatException(value);
510 }
511 }
512
Jesse Wilson76d7e202010-08-03 17:55:09 -0700513 advance();
514 return result;
515 }
516
517 /**
518 * Closes this JSON reader and the underlying {@link Reader}.
519 */
520 public void close() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700521 value = null;
522 token = null;
523 stack.clear();
524 stack.add(JsonScope.CLOSED);
525 in.close();
526 }
527
528 /**
529 * Skips the next value recursively. If it is an object or array, all nested
530 * elements are skipped. This method is intended for use when the JSON token
531 * stream contains unrecognized or unhandled values.
532 */
533 public void skipValue() throws IOException {
Jesse Wilsond07fb882010-08-06 19:30:04 -0700534 skipping = true;
535 try {
536 int count = 0;
537 do {
538 JsonToken token = advance();
539 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) {
540 count++;
541 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) {
542 count--;
543 }
544 } while (count != 0);
545 } finally {
546 skipping = false;
547 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700548 }
549
550 private JsonScope peekStack() {
551 return stack.get(stack.size() - 1);
552 }
553
554 private JsonScope pop() {
555 return stack.remove(stack.size() - 1);
556 }
557
558 private void push(JsonScope newTop) {
559 stack.add(newTop);
560 }
561
562 /**
563 * Replace the value on the top of the stack with the given value.
564 */
565 private void replaceTop(JsonScope newTop) {
566 stack.set(stack.size() - 1, newTop);
567 }
568
569 private JsonToken nextInArray(boolean firstElement) throws IOException {
570 if (firstElement) {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700571 replaceTop(JsonScope.NONEMPTY_ARRAY);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700572 } else {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700573 /* Look for a comma before each element after the first element. */
Jesse Wilson76d7e202010-08-03 17:55:09 -0700574 switch (nextNonWhitespace()) {
575 case ']':
576 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700577 return token = JsonToken.END_ARRAY;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700578 case ';':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700579 checkLenient(); // fall-through
580 case ',':
Jesse Wilson76d7e202010-08-03 17:55:09 -0700581 break;
582 default:
583 throw syntaxError("Unterminated array");
584 }
585 }
586
Jesse Wilson1ba41712010-08-06 16:08:59 -0700587 switch (nextNonWhitespace()) {
588 case ']':
589 if (firstElement) {
590 pop();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700591 return token = JsonToken.END_ARRAY;
592 }
593 // fall-through to handle ",]"
594 case ';':
595 case ',':
596 /* In lenient mode, a 0-length literal means 'null' */
597 checkLenient();
598 pos--;
Jesse Wilson1ba41712010-08-06 16:08:59 -0700599 value = "null";
600 return token = JsonToken.NULL;
601 default:
602 pos--;
603 return nextValue();
604 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700605 }
606
607 private JsonToken nextInObject(boolean firstElement) throws IOException {
608 /*
609 * Read delimiters. Either a comma/semicolon separating this and the
610 * previous name-value pair, or a close brace to denote the end of the
611 * object.
612 */
613 if (firstElement) {
614 /* Peek to see if this is the empty object. */
615 switch (nextNonWhitespace()) {
616 case '}':
617 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700618 return token = JsonToken.END_OBJECT;
619 default:
620 pos--;
621 }
622 } else {
623 switch (nextNonWhitespace()) {
624 case '}':
625 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700626 return token = JsonToken.END_OBJECT;
627 case ';':
628 case ',':
629 break;
630 default:
631 throw syntaxError("Unterminated object");
632 }
633 }
634
635 /* Read the name. */
636 int quote = nextNonWhitespace();
637 switch (quote) {
638 case '\'':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700639 checkLenient(); // fall-through
Jesse Wilson76d7e202010-08-03 17:55:09 -0700640 case '"':
641 name = nextString((char) quote);
642 break;
643 default:
Jesse Wilson1ba41712010-08-06 16:08:59 -0700644 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700645 pos--;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800646 name = nextLiteral(false);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700647 if (name.isEmpty()) {
648 throw syntaxError("Expected name");
649 }
650 }
651
652 replaceTop(JsonScope.DANGLING_NAME);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700653 return token = JsonToken.NAME;
654 }
655
656 private JsonToken objectValue() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700657 /*
Jesse Wilson1ba41712010-08-06 16:08:59 -0700658 * Read the name/value separator. Usually a colon ':'. In lenient mode
659 * we also accept an equals sign '=', or an arrow "=>".
Jesse Wilson76d7e202010-08-03 17:55:09 -0700660 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700661 switch (nextNonWhitespace()) {
662 case ':':
663 break;
664 case '=':
665 checkLenient();
666 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') {
667 pos++;
668 }
669 break;
670 default:
671 throw syntaxError("Expected ':'");
Jesse Wilson76d7e202010-08-03 17:55:09 -0700672 }
673
674 replaceTop(JsonScope.NONEMPTY_OBJECT);
675 return nextValue();
676 }
677
678 private JsonToken nextValue() throws IOException {
679 int c = nextNonWhitespace();
680 switch (c) {
681 case '{':
682 push(JsonScope.EMPTY_OBJECT);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700683 return token = JsonToken.BEGIN_OBJECT;
684
685 case '[':
686 push(JsonScope.EMPTY_ARRAY);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700687 return token = JsonToken.BEGIN_ARRAY;
688
689 case '\'':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700690 checkLenient(); // fall-through
Jesse Wilson76d7e202010-08-03 17:55:09 -0700691 case '"':
692 value = nextString((char) c);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700693 return token = JsonToken.STRING;
694
695 default:
696 pos--;
697 return readLiteral();
698 }
699 }
700
701 /**
702 * Returns true once {@code limit - pos >= minimum}. If the data is
703 * exhausted before that many characters are available, this returns
704 * false.
705 */
706 private boolean fillBuffer(int minimum) throws IOException {
707 if (limit != pos) {
708 limit -= pos;
709 System.arraycopy(buffer, pos, buffer, 0, limit);
710 } else {
711 limit = 0;
712 }
713
714 pos = 0;
715 int total;
716 while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
717 limit += total;
718 if (limit >= minimum) {
719 return true;
720 }
721 }
722 return false;
723 }
724
725 private int nextNonWhitespace() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700726 while (pos < limit || fillBuffer(1)) {
727 int c = buffer[pos++];
728 switch (c) {
729 case '\t':
730 case ' ':
731 case '\n':
732 case '\r':
733 continue;
734
735 case '/':
736 if (pos == limit && !fillBuffer(1)) {
737 return c;
738 }
739
Jesse Wilson1ba41712010-08-06 16:08:59 -0700740 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700741 char peek = buffer[pos];
742 switch (peek) {
743 case '*':
744 // skip a /* c-style comment */
745 pos++;
746 if (!skipTo("*/")) {
747 throw syntaxError("Unterminated comment");
748 }
749 pos += 2;
750 continue;
751
752 case '/':
753 // skip a // end-of-line comment
754 pos++;
755 skipToEndOfLine();
756 continue;
757
758 default:
759 return c;
760 }
761
762 case '#':
763 /*
764 * Skip a # hash end-of-line comment. The JSON RFC doesn't
765 * specify this behaviour, but it's required to parse
766 * existing documents. See http://b/2571423.
767 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700768 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700769 skipToEndOfLine();
770 continue;
771
772 default:
773 return c;
774 }
775 }
776
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800777 throw new EOFException("End of input");
Jesse Wilson76d7e202010-08-03 17:55:09 -0700778 }
779
Jesse Wilson1ba41712010-08-06 16:08:59 -0700780 private void checkLenient() throws IOException {
781 if (!lenient) {
782 throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
783 }
784 }
785
Jesse Wilson76d7e202010-08-03 17:55:09 -0700786 /**
787 * Advances the position until after the next newline character. If the line
788 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
789 * caller.
790 */
791 private void skipToEndOfLine() throws IOException {
792 while (pos < limit || fillBuffer(1)) {
793 char c = buffer[pos++];
794 if (c == '\r' || c == '\n') {
795 break;
796 }
797 }
798 }
799
800 private boolean skipTo(String toFind) throws IOException {
801 outer:
802 for (; pos + toFind.length() < limit || fillBuffer(toFind.length()); pos++) {
803 for (int c = 0; c < toFind.length(); c++) {
804 if (buffer[pos + c] != toFind.charAt(c)) {
805 continue outer;
806 }
807 }
808 return true;
809 }
810 return false;
811 }
812
813 /**
814 * Returns the string up to but not including {@code quote}, unescaping any
815 * character escape sequences encountered along the way. The opening quote
816 * should have already been read. This consumes the closing quote, but does
817 * not include it in the returned string.
818 *
819 * @param quote either ' or ".
820 * @throws NumberFormatException if any unicode escape sequences are
821 * malformed.
822 */
823 private String nextString(char quote) throws IOException {
824 StringBuilder builder = null;
825 do {
826 /* the index of the first character not yet appended to the builder. */
827 int start = pos;
828 while (pos < limit) {
829 int c = buffer[pos++];
830
831 if (c == quote) {
Jesse Wilsond07fb882010-08-06 19:30:04 -0700832 if (skipping) {
833 return "skipped!";
834 } else if (builder == null) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700835 return new String(buffer, start, pos - start - 1);
836 } else {
837 builder.append(buffer, start, pos - start - 1);
838 return builder.toString();
839 }
840
841 } else if (c == '\\') {
842 if (builder == null) {
843 builder = new StringBuilder();
844 }
845 builder.append(buffer, start, pos - start - 1);
846 builder.append(readEscapeCharacter());
847 start = pos;
848 }
849 }
850
851 if (builder == null) {
852 builder = new StringBuilder();
853 }
854 builder.append(buffer, start, pos - start);
855 } while (fillBuffer(1));
856
857 throw syntaxError("Unterminated string");
858 }
859
860 /**
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800861 * Reads the value up to but not including any delimiter characters. This
Jesse Wilson76d7e202010-08-03 17:55:09 -0700862 * does not consume the delimiter character.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800863 *
864 * @param assignOffsetsOnly true for this method to only set the valuePos
865 * and valueLength fields and return a null result. This only works if
866 * the literal is short; a string is returned otherwise.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700867 */
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800868 private String nextLiteral(boolean assignOffsetsOnly) throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700869 StringBuilder builder = null;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800870 valuePos = -1;
871 valueLength = 0;
872 int i = 0;
Jesse Wilson1ba41712010-08-06 16:08:59 -0700873
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800874 findNonLiteralCharacter:
875 while (true) {
876 for (; pos + i < limit; i++) {
877 switch (buffer[pos + i]) {
878 case '/':
879 case '\\':
880 case ';':
881 case '#':
882 case '=':
883 checkLenient(); // fall-through
884 case '{':
885 case '}':
886 case '[':
887 case ']':
888 case ':':
889 case ',':
890 case ' ':
891 case '\t':
892 case '\f':
893 case '\r':
894 case '\n':
895 break findNonLiteralCharacter;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700896 }
897 }
898
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800899 /*
900 * Attempt to load the entire literal into the buffer at once. If
901 * we run out of input, add a non-literal character at the end so
902 * that decoding doesn't need to do bounds checks.
903 */
904 if (i < buffer.length) {
905 if (fillBuffer(i + 1)) {
906 continue;
907 } else {
908 buffer[limit] = '\0';
909 break;
910 }
911 }
912
913 // use a StringBuilder when the value is too long. It must be an unquoted string.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700914 if (builder == null) {
915 builder = new StringBuilder();
916 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800917 builder.append(buffer, pos, i);
918 valueLength += i;
919 pos += i;
920 i = 0;
921 if (!fillBuffer(1)) {
922 break;
923 }
924 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700925
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800926 String result;
927 if (assignOffsetsOnly && builder == null) {
928 valuePos = pos;
929 result = null;
930 } else if (skipping) {
931 result = "skipped!";
932 } else if (builder == null) {
933 result = new String(buffer, pos, i);
934 } else {
935 builder.append(buffer, pos, i);
936 result = builder.toString();
937 }
938 valueLength += i;
939 pos += i;
940 return result;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700941 }
942
943 @Override public String toString() {
944 return getClass().getSimpleName() + " near " + getSnippet();
945 }
946
947 /**
948 * Unescapes the character identified by the character or characters that
949 * immediately follow a backslash. The backslash '\' should have already
950 * been read. This supports both unicode escapes "u000A" and two-character
951 * escapes "\n".
952 *
953 * @throws NumberFormatException if any unicode escape sequences are
954 * malformed.
955 */
956 private char readEscapeCharacter() throws IOException {
957 if (pos == limit && !fillBuffer(1)) {
958 throw syntaxError("Unterminated escape sequence");
959 }
960
961 char escaped = buffer[pos++];
962 switch (escaped) {
963 case 'u':
964 if (pos + 4 > limit && !fillBuffer(4)) {
965 throw syntaxError("Unterminated escape sequence");
966 }
967 String hex = new String(buffer, pos, 4);
968 pos += 4;
969 return (char) Integer.parseInt(hex, 16);
970
971 case 't':
972 return '\t';
973
974 case 'b':
975 return '\b';
976
977 case 'n':
978 return '\n';
979
980 case 'r':
981 return '\r';
982
983 case 'f':
984 return '\f';
985
986 case '\'':
987 case '"':
988 case '\\':
989 default:
990 return escaped;
991 }
992 }
993
994 /**
995 * Reads a null, boolean, numeric or unquoted string literal value.
996 */
997 private JsonToken readLiteral() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800998 value = nextLiteral(true);
999 if (valueLength == 0) {
Jesse Wilson76d7e202010-08-03 17:55:09 -07001000 throw syntaxError("Expected literal value");
1001 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001002 token = decodeLiteral();
1003 if (token == JsonToken.STRING) {
1004 checkLenient();
1005 }
1006 return token;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001007 }
1008
1009 /**
1010 * Assigns {@code nextToken} based on the value of {@code nextValue}.
1011 */
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001012 private JsonToken decodeLiteral() throws IOException {
1013 if (valuePos == -1) {
1014 // it was too long to fit in the buffer so it can only be a string
1015 return JsonToken.STRING;
1016 } else if (valueLength == 4
1017 && ('n' == buffer[valuePos ] || 'N' == buffer[valuePos ])
1018 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1])
1019 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1020 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) {
1021 value = "null";
1022 return JsonToken.NULL;
1023 } else if (valueLength == 4
1024 && ('t' == buffer[valuePos ] || 'T' == buffer[valuePos ])
1025 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1])
1026 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2])
1027 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) {
1028 value = TRUE;
1029 return JsonToken.BOOLEAN;
1030 } else if (valueLength == 5
1031 && ('f' == buffer[valuePos ] || 'F' == buffer[valuePos ])
1032 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1])
1033 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1034 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3])
1035 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) {
1036 value = FALSE;
1037 return JsonToken.BOOLEAN;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001038 } else {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001039 value = new String(buffer, valuePos, valueLength);
1040 return decodeNumber(buffer, valuePos, valueLength);
1041 }
1042 }
1043
1044 /**
1045 * Determine whether the characters is a JSON number. Numbers are of the
1046 * form -12.34e+56. Fractional and exponential parts are optional. Leading
1047 * zeroes are not allowed in the value or exponential part, but are allowed
1048 * in the fraction.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001049 */
1050 private JsonToken decodeNumber(char[] chars, int offset, int length) {
1051 int i = offset;
1052 int c = chars[i];
1053
1054 if (c == '-') {
1055 c = chars[++i];
1056 }
1057
1058 if (c == '0') {
1059 c = chars[++i];
1060 } else if (c >= '1' && c <= '9') {
1061 c = chars[++i];
1062 while (c >= '0' && c <= '9') {
1063 c = chars[++i];
Jesse Wilson76d7e202010-08-03 17:55:09 -07001064 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001065 } else {
1066 return JsonToken.STRING;
1067 }
1068
1069 if (c == '.') {
1070 c = chars[++i];
1071 while (c >= '0' && c <= '9') {
1072 c = chars[++i];
1073 }
1074 }
1075
1076 if (c == 'e' || c == 'E') {
1077 c = chars[++i];
1078 if (c == '+' || c == '-') {
1079 c = chars[++i];
1080 }
1081 if (c >= '0' && c <= '9') {
1082 c = chars[++i];
1083 while (c >= '0' && c <= '9') {
1084 c = chars[++i];
1085 }
1086 } else {
1087 return JsonToken.STRING;
1088 }
1089 }
1090
1091 if (i == offset + length) {
1092 return JsonToken.NUMBER;
1093 } else {
1094 return JsonToken.STRING;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001095 }
1096 }
1097
1098 /**
1099 * Throws a new IO exception with the given message and a context snippet
1100 * with this reader's content.
1101 */
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -08001102 private IOException syntaxError(String message) throws IOException {
1103 throw new MalformedJsonException(message + " near " + getSnippet());
Jesse Wilson76d7e202010-08-03 17:55:09 -07001104 }
1105
1106 private CharSequence getSnippet() {
1107 StringBuilder snippet = new StringBuilder();
1108 int beforePos = Math.min(pos, 20);
1109 snippet.append(buffer, pos - beforePos, beforePos);
1110 int afterPos = Math.min(limit - pos, 20);
1111 snippet.append(buffer, pos, afterPos);
1112 return snippet;
1113 }
Jesse Wilson76d7e202010-08-03 17:55:09 -07001114}