blob: 132b5953fa6881c2bdcd5b054c53b47027edc130 [file] [log] [blame]
Jesse Wilson76d7e202010-08-03 17:55:09 -07001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.util;
18
Jesse Wilson847cf342011-04-21 11:28:31 -070019import java.io.Closeable;
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -080020import java.io.EOFException;
Jesse Wilson76d7e202010-08-03 17:55:09 -070021import java.io.IOException;
22import java.io.Reader;
Jesse Wilson76d7e202010-08-03 17:55:09 -070023import java.util.ArrayList;
24import java.util.List;
Jesse Wilson847cf342011-04-21 11:28:31 -070025import libcore.internal.StringPool;
Jesse Wilson76d7e202010-08-03 17:55:09 -070026
27/**
28 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
29 * encoded value as a stream of tokens. This stream includes both literal
30 * values (strings, numbers, booleans, and nulls) as well as the begin and
31 * end delimiters of objects and arrays. The tokens are traversed in
32 * depth-first order, the same order that they appear in the JSON document.
33 * Within JSON objects, name/value pairs are represented by a single token.
34 *
35 * <h3>Parsing JSON</h3>
Jesse Wilson3312b292010-10-15 17:33:54 -070036 * To create a recursive descent parser for your own JSON streams, first create
37 * an entry point method that creates a {@code JsonReader}.
Jesse Wilson76d7e202010-08-03 17:55:09 -070038 *
39 * <p>Next, create handler methods for each structure in your JSON text. You'll
40 * need a method for each object type and for each array type.
41 * <ul>
42 * <li>Within <strong>array handling</strong> methods, first call {@link
43 * #beginArray} to consume the array's opening bracket. Then create a
44 * while loop that accumulates values, terminating when {@link #hasNext}
45 * is false. Finally, read the array's closing bracket by calling {@link
46 * #endArray}.
47 * <li>Within <strong>object handling</strong> methods, first call {@link
48 * #beginObject} to consume the object's opening brace. Then create a
49 * while loop that assigns values to local variables based on their name.
50 * This loop should terminate when {@link #hasNext} is false. Finally,
51 * read the object's closing brace by calling {@link #endObject}.
52 * </ul>
53 * <p>When a nested object or array is encountered, delegate to the
54 * corresponding handler method.
55 *
56 * <p>When an unknown name is encountered, strict parsers should fail with an
57 * exception. Lenient parsers should call {@link #skipValue()} to recursively
58 * skip the value's nested tokens, which may otherwise conflict.
59 *
60 * <p>If a value may be null, you should first check using {@link #peek()}.
61 * Null literals can be consumed using either {@link #nextNull()} or {@link
62 * #skipValue()}.
63 *
64 * <h3>Example</h3>
65 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code
66 * [
67 * {
68 * "id": 912345678901,
69 * "text": "How do I read JSON on Android?",
70 * "geo": null,
71 * "user": {
72 * "name": "android_newb",
73 * "followers_count": 41
74 * }
75 * },
76 * {
77 * "id": 912345678902,
78 * "text": "@android_newb just use android.util.JsonReader!",
79 * "geo": [50.454722, -104.606667],
80 * "user": {
81 * "name": "jesse",
82 * "followers_count": 2
83 * }
84 * }
85 * ]}</pre>
86 * This code implements the parser for the above structure: <pre> {@code
87 *
88 * public List<Message> readJsonStream(InputStream in) throws IOException {
89 * JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
Jesse Wilson9d30ea02011-03-16 15:02:48 -070090 * try {
91 * return readMessagesArray(reader);
92 * } finally {
93 * reader.close();
94 * }
Jesse Wilson76d7e202010-08-03 17:55:09 -070095 * }
96 *
97 * public List<Message> readMessagesArray(JsonReader reader) throws IOException {
98 * List<Message> messages = new ArrayList<Message>();
99 *
100 * reader.beginArray();
101 * while (reader.hasNext()) {
102 * messages.add(readMessage(reader));
103 * }
104 * reader.endArray();
105 * return messages;
106 * }
107 *
108 * public Message readMessage(JsonReader reader) throws IOException {
109 * long id = -1;
110 * String text = null;
111 * User user = null;
112 * List<Double> geo = null;
113 *
114 * reader.beginObject();
115 * while (reader.hasNext()) {
116 * String name = reader.nextName();
117 * if (name.equals("id")) {
118 * id = reader.nextLong();
119 * } else if (name.equals("text")) {
120 * text = reader.nextString();
121 * } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
122 * geo = readDoublesArray(reader);
123 * } else if (name.equals("user")) {
124 * user = readUser(reader);
125 * } else {
126 * reader.skipValue();
127 * }
128 * }
129 * reader.endObject();
130 * return new Message(id, text, user, geo);
131 * }
132 *
133 * public List<Double> readDoublesArray(JsonReader reader) throws IOException {
134 * List<Double> doubles = new ArrayList<Double>();
135 *
136 * reader.beginArray();
137 * while (reader.hasNext()) {
138 * doubles.add(reader.nextDouble());
139 * }
140 * reader.endArray();
141 * return doubles;
142 * }
143 *
144 * public User readUser(JsonReader reader) throws IOException {
145 * String username = null;
146 * int followersCount = -1;
147 *
148 * reader.beginObject();
149 * while (reader.hasNext()) {
150 * String name = reader.nextName();
151 * if (name.equals("name")) {
152 * username = reader.nextString();
153 * } else if (name.equals("followers_count")) {
154 * followersCount = reader.nextInt();
155 * } else {
156 * reader.skipValue();
157 * }
158 * }
159 * reader.endObject();
160 * return new User(username, followersCount);
161 * }}</pre>
162 *
163 * <h3>Number Handling</h3>
164 * This reader permits numeric values to be read as strings and string values to
165 * be read as numbers. For example, both elements of the JSON array {@code
166 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}.
167 * This behavior is intended to prevent lossy numeric conversions: double is
168 * JavaScript's only numeric type and very large values like {@code
169 * 9007199254740993} cannot be represented exactly on that platform. To minimize
170 * precision loss, extremely large values should be written and read as strings
171 * in JSON.
172 *
173 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances
174 * of this class are not thread safe.
175 */
176public final class JsonReader implements Closeable {
177
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800178 private static final String TRUE = "true";
179 private static final String FALSE = "false";
180
Jesse Wilson847cf342011-04-21 11:28:31 -0700181 private final StringPool stringPool = new StringPool();
182
Jesse Wilson76d7e202010-08-03 17:55:09 -0700183 /** The input JSON. */
184 private final Reader in;
185
Jesse Wilson1ba41712010-08-06 16:08:59 -0700186 /** True to accept non-spec compliant JSON */
187 private boolean lenient = false;
188
Jesse Wilson76d7e202010-08-03 17:55:09 -0700189 /**
190 * Use a manual buffer to easily read and unread upcoming characters, and
191 * also so we can create strings without an intermediate StringBuilder.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800192 * We decode literals directly out of this buffer, so it must be at least as
193 * long as the longest token that can be reported as a number.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700194 */
195 private final char[] buffer = new char[1024];
196 private int pos = 0;
197 private int limit = 0;
198
199 private final List<JsonScope> stack = new ArrayList<JsonScope>();
200 {
201 push(JsonScope.EMPTY_DOCUMENT);
202 }
203
204 /**
Jesse Wilson76d7e202010-08-03 17:55:09 -0700205 * The type of the next token to be returned by {@link #peek} and {@link
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800206 * #advance}. If null, peek() will assign a value.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700207 */
208 private JsonToken token;
209
210 /** The text of the next name. */
211 private String name;
212
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800213 /*
214 * For the next literal value, we may have the text value, or the position
215 * and length in the buffer.
216 */
Jesse Wilson76d7e202010-08-03 17:55:09 -0700217 private String value;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800218 private int valuePos;
219 private int valueLength;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700220
Jesse Wilsond07fb882010-08-06 19:30:04 -0700221 /** True if we're currently handling a skipValue() call. */
222 private boolean skipping = false;
223
Jesse Wilson76d7e202010-08-03 17:55:09 -0700224 /**
225 * Creates a new instance that reads a JSON-encoded stream from {@code in}.
226 */
227 public JsonReader(Reader in) {
228 if (in == null) {
229 throw new NullPointerException("in == null");
230 }
231 this.in = in;
232 }
233
234 /**
Jesse Wilson1ba41712010-08-06 16:08:59 -0700235 * Configure this parser to be be liberal in what it accepts. By default,
236 * this parser is strict and only accepts JSON as specified by <a
237 * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the
238 * parser to lenient causes it to ignore the following syntax errors:
239 *
240 * <ul>
241 * <li>End of line comments starting with {@code //} or {@code #} and
242 * ending with a newline character.
243 * <li>C-style comments starting with {@code /*} and ending with
244 * {@code *}{@code /}. Such comments may not be nested.
245 * <li>Names that are unquoted or {@code 'single quoted'}.
246 * <li>Strings that are unquoted or {@code 'single quoted'}.
247 * <li>Array elements separated by {@code ;} instead of {@code ,}.
248 * <li>Unnecessary array separators. These are interpreted as if null
249 * was the omitted value.
250 * <li>Names and values separated by {@code =} or {@code =>} instead of
251 * {@code :}.
252 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}.
253 * </ul>
254 */
255 public void setLenient(boolean lenient) {
256 this.lenient = lenient;
257 }
258
259 /**
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800260 * Returns true if this parser is liberal in what it accepts.
261 */
262 public boolean isLenient() {
263 return lenient;
264 }
265
266 /**
Jesse Wilson76d7e202010-08-03 17:55:09 -0700267 * Consumes the next token from the JSON stream and asserts that it is the
268 * beginning of a new array.
269 */
270 public void beginArray() throws IOException {
271 expect(JsonToken.BEGIN_ARRAY);
272 }
273
274 /**
275 * Consumes the next token from the JSON stream and asserts that it is the
276 * end of the current array.
277 */
278 public void endArray() throws IOException {
279 expect(JsonToken.END_ARRAY);
280 }
281
282 /**
283 * Consumes the next token from the JSON stream and asserts that it is the
284 * beginning of a new object.
285 */
286 public void beginObject() throws IOException {
287 expect(JsonToken.BEGIN_OBJECT);
288 }
289
290 /**
291 * Consumes the next token from the JSON stream and asserts that it is the
292 * end of the current array.
293 */
294 public void endObject() throws IOException {
295 expect(JsonToken.END_OBJECT);
296 }
297
298 /**
Jesse Wilson1ba41712010-08-06 16:08:59 -0700299 * Consumes {@code expected}.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700300 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700301 private void expect(JsonToken expected) throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800302 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700303 if (token != expected) {
304 throw new IllegalStateException("Expected " + expected + " but was " + peek());
Jesse Wilson76d7e202010-08-03 17:55:09 -0700305 }
306 advance();
307 }
308
309 /**
310 * Returns true if the current array or object has another element.
311 */
312 public boolean hasNext() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800313 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700314 return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700315 }
316
317 /**
318 * Returns the type of the next token without consuming it.
319 */
320 public JsonToken peek() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800321 if (token != null) {
322 return token;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700323 }
324
325 switch (peekStack()) {
326 case EMPTY_DOCUMENT:
327 replaceTop(JsonScope.NONEMPTY_DOCUMENT);
328 JsonToken firstToken = nextValue();
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800329 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700330 throw new IOException(
331 "Expected JSON document to start with '[' or '{' but was " + token);
332 }
333 return firstToken;
334 case EMPTY_ARRAY:
335 return nextInArray(true);
336 case NONEMPTY_ARRAY:
337 return nextInArray(false);
338 case EMPTY_OBJECT:
339 return nextInObject(true);
340 case DANGLING_NAME:
341 return objectValue();
342 case NONEMPTY_OBJECT:
343 return nextInObject(false);
344 case NONEMPTY_DOCUMENT:
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800345 try {
346 JsonToken token = nextValue();
347 if (lenient) {
348 return token;
349 }
350 throw syntaxError("Expected EOF");
351 } catch (EOFException e) {
352 return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here?
353 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700354 case CLOSED:
355 throw new IllegalStateException("JsonReader is closed");
356 default:
357 throw new AssertionError();
358 }
359 }
360
361 /**
362 * Advances the cursor in the JSON stream to the next token.
363 */
364 private JsonToken advance() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800365 peek();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700366
367 JsonToken result = token;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700368 token = null;
369 value = null;
370 name = null;
371 return result;
372 }
373
374 /**
375 * Returns the next token, a {@link JsonToken#NAME property name}, and
376 * consumes it.
377 *
378 * @throws IOException if the next token in the stream is not a property
379 * name.
380 */
381 public String nextName() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800382 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700383 if (token != JsonToken.NAME) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700384 throw new IllegalStateException("Expected a name but was " + peek());
385 }
386 String result = name;
387 advance();
388 return result;
389 }
390
391 /**
392 * Returns the {@link JsonToken#STRING string} value of the next token,
393 * consuming it. If the next token is a number, this method will return its
394 * string form.
395 *
396 * @throws IllegalStateException if the next token is not a string or if
397 * this reader is closed.
398 */
399 public String nextString() throws IOException {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700400 peek();
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800401 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700402 throw new IllegalStateException("Expected a string but was " + peek());
403 }
404
405 String result = value;
406 advance();
407 return result;
408 }
409
410 /**
411 * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token,
412 * consuming it.
413 *
414 * @throws IllegalStateException if the next token is not a boolean or if
415 * this reader is closed.
416 */
417 public boolean nextBoolean() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800418 peek();
419 if (token != JsonToken.BOOLEAN) {
420 throw new IllegalStateException("Expected a boolean but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700421 }
422
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800423 boolean result = (value == TRUE);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700424 advance();
425 return result;
426 }
427
428 /**
429 * Consumes the next token from the JSON stream and asserts that it is a
430 * literal null.
431 *
432 * @throws IllegalStateException if the next token is not null or if this
433 * reader is closed.
434 */
435 public void nextNull() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800436 peek();
437 if (token != JsonToken.NULL) {
438 throw new IllegalStateException("Expected null but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700439 }
440
441 advance();
442 }
443
444 /**
445 * Returns the {@link JsonToken#NUMBER double} value of the next token,
446 * consuming it. If the next token is a string, this method will attempt to
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800447 * parse it as a double using {@link Double#parseDouble(String)}.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700448 *
449 * @throws IllegalStateException if the next token is not a literal value.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700450 */
451 public double nextDouble() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800452 peek();
453 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
454 throw new IllegalStateException("Expected a double but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700455 }
456
457 double result = Double.parseDouble(value);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700458 advance();
459 return result;
460 }
461
462 /**
463 * Returns the {@link JsonToken#NUMBER long} value of the next token,
464 * consuming it. If the next token is a string, this method will attempt to
465 * parse it as a long. If the next token's numeric value cannot be exactly
466 * represented by a Java {@code long}, this method throws.
467 *
468 * @throws IllegalStateException if the next token is not a literal value.
469 * @throws NumberFormatException if the next literal value cannot be parsed
470 * as a number, or exactly represented as a long.
471 */
472 public long nextLong() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800473 peek();
474 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
475 throw new IllegalStateException("Expected a long but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700476 }
477
478 long result;
479 try {
480 result = Long.parseLong(value);
481 } catch (NumberFormatException ignored) {
482 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
483 result = (long) asDouble;
484 if ((double) result != asDouble) {
485 throw new NumberFormatException(value);
486 }
487 }
488
Jesse Wilson76d7e202010-08-03 17:55:09 -0700489 advance();
490 return result;
491 }
492
493 /**
494 * Returns the {@link JsonToken#NUMBER int} value of the next token,
495 * consuming it. If the next token is a string, this method will attempt to
496 * parse it as an int. If the next token's numeric value cannot be exactly
497 * represented by a Java {@code int}, this method throws.
498 *
499 * @throws IllegalStateException if the next token is not a literal value.
500 * @throws NumberFormatException if the next literal value cannot be parsed
501 * as a number, or exactly represented as an int.
502 */
503 public int nextInt() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800504 peek();
505 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
506 throw new IllegalStateException("Expected an int but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700507 }
508
509 int result;
510 try {
511 result = Integer.parseInt(value);
512 } catch (NumberFormatException ignored) {
513 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
514 result = (int) asDouble;
515 if ((double) result != asDouble) {
516 throw new NumberFormatException(value);
517 }
518 }
519
Jesse Wilson76d7e202010-08-03 17:55:09 -0700520 advance();
521 return result;
522 }
523
524 /**
525 * Closes this JSON reader and the underlying {@link Reader}.
526 */
527 public void close() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700528 value = null;
529 token = null;
530 stack.clear();
531 stack.add(JsonScope.CLOSED);
532 in.close();
533 }
534
535 /**
536 * Skips the next value recursively. If it is an object or array, all nested
537 * elements are skipped. This method is intended for use when the JSON token
538 * stream contains unrecognized or unhandled values.
539 */
540 public void skipValue() throws IOException {
Jesse Wilsond07fb882010-08-06 19:30:04 -0700541 skipping = true;
542 try {
543 int count = 0;
544 do {
545 JsonToken token = advance();
546 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) {
547 count++;
548 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) {
549 count--;
550 }
551 } while (count != 0);
552 } finally {
553 skipping = false;
554 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700555 }
556
557 private JsonScope peekStack() {
558 return stack.get(stack.size() - 1);
559 }
560
561 private JsonScope pop() {
562 return stack.remove(stack.size() - 1);
563 }
564
565 private void push(JsonScope newTop) {
566 stack.add(newTop);
567 }
568
569 /**
570 * Replace the value on the top of the stack with the given value.
571 */
572 private void replaceTop(JsonScope newTop) {
573 stack.set(stack.size() - 1, newTop);
574 }
575
576 private JsonToken nextInArray(boolean firstElement) throws IOException {
577 if (firstElement) {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700578 replaceTop(JsonScope.NONEMPTY_ARRAY);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700579 } else {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700580 /* Look for a comma before each element after the first element. */
Jesse Wilson76d7e202010-08-03 17:55:09 -0700581 switch (nextNonWhitespace()) {
582 case ']':
583 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700584 return token = JsonToken.END_ARRAY;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700585 case ';':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700586 checkLenient(); // fall-through
587 case ',':
Jesse Wilson76d7e202010-08-03 17:55:09 -0700588 break;
589 default:
590 throw syntaxError("Unterminated array");
591 }
592 }
593
Jesse Wilson1ba41712010-08-06 16:08:59 -0700594 switch (nextNonWhitespace()) {
595 case ']':
596 if (firstElement) {
597 pop();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700598 return token = JsonToken.END_ARRAY;
599 }
600 // fall-through to handle ",]"
601 case ';':
602 case ',':
603 /* In lenient mode, a 0-length literal means 'null' */
604 checkLenient();
605 pos--;
Jesse Wilson1ba41712010-08-06 16:08:59 -0700606 value = "null";
607 return token = JsonToken.NULL;
608 default:
609 pos--;
610 return nextValue();
611 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700612 }
613
614 private JsonToken nextInObject(boolean firstElement) throws IOException {
615 /*
616 * Read delimiters. Either a comma/semicolon separating this and the
617 * previous name-value pair, or a close brace to denote the end of the
618 * object.
619 */
620 if (firstElement) {
621 /* Peek to see if this is the empty object. */
622 switch (nextNonWhitespace()) {
623 case '}':
624 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700625 return token = JsonToken.END_OBJECT;
626 default:
627 pos--;
628 }
629 } else {
630 switch (nextNonWhitespace()) {
631 case '}':
632 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700633 return token = JsonToken.END_OBJECT;
634 case ';':
635 case ',':
636 break;
637 default:
638 throw syntaxError("Unterminated object");
639 }
640 }
641
642 /* Read the name. */
643 int quote = nextNonWhitespace();
644 switch (quote) {
645 case '\'':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700646 checkLenient(); // fall-through
Jesse Wilson76d7e202010-08-03 17:55:09 -0700647 case '"':
648 name = nextString((char) quote);
649 break;
650 default:
Jesse Wilson1ba41712010-08-06 16:08:59 -0700651 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700652 pos--;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800653 name = nextLiteral(false);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700654 if (name.isEmpty()) {
655 throw syntaxError("Expected name");
656 }
657 }
658
659 replaceTop(JsonScope.DANGLING_NAME);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700660 return token = JsonToken.NAME;
661 }
662
663 private JsonToken objectValue() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700664 /*
Jesse Wilson1ba41712010-08-06 16:08:59 -0700665 * Read the name/value separator. Usually a colon ':'. In lenient mode
666 * we also accept an equals sign '=', or an arrow "=>".
Jesse Wilson76d7e202010-08-03 17:55:09 -0700667 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700668 switch (nextNonWhitespace()) {
669 case ':':
670 break;
671 case '=':
672 checkLenient();
673 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') {
674 pos++;
675 }
676 break;
677 default:
678 throw syntaxError("Expected ':'");
Jesse Wilson76d7e202010-08-03 17:55:09 -0700679 }
680
681 replaceTop(JsonScope.NONEMPTY_OBJECT);
682 return nextValue();
683 }
684
685 private JsonToken nextValue() throws IOException {
686 int c = nextNonWhitespace();
687 switch (c) {
688 case '{':
689 push(JsonScope.EMPTY_OBJECT);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700690 return token = JsonToken.BEGIN_OBJECT;
691
692 case '[':
693 push(JsonScope.EMPTY_ARRAY);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700694 return token = JsonToken.BEGIN_ARRAY;
695
696 case '\'':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700697 checkLenient(); // fall-through
Jesse Wilson76d7e202010-08-03 17:55:09 -0700698 case '"':
699 value = nextString((char) c);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700700 return token = JsonToken.STRING;
701
702 default:
703 pos--;
704 return readLiteral();
705 }
706 }
707
708 /**
709 * Returns true once {@code limit - pos >= minimum}. If the data is
710 * exhausted before that many characters are available, this returns
711 * false.
712 */
713 private boolean fillBuffer(int minimum) throws IOException {
714 if (limit != pos) {
715 limit -= pos;
716 System.arraycopy(buffer, pos, buffer, 0, limit);
717 } else {
718 limit = 0;
719 }
720
721 pos = 0;
722 int total;
723 while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
724 limit += total;
725 if (limit >= minimum) {
726 return true;
727 }
728 }
729 return false;
730 }
731
732 private int nextNonWhitespace() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700733 while (pos < limit || fillBuffer(1)) {
734 int c = buffer[pos++];
735 switch (c) {
736 case '\t':
737 case ' ':
738 case '\n':
739 case '\r':
740 continue;
741
742 case '/':
743 if (pos == limit && !fillBuffer(1)) {
744 return c;
745 }
746
Jesse Wilson1ba41712010-08-06 16:08:59 -0700747 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700748 char peek = buffer[pos];
749 switch (peek) {
750 case '*':
751 // skip a /* c-style comment */
752 pos++;
753 if (!skipTo("*/")) {
754 throw syntaxError("Unterminated comment");
755 }
756 pos += 2;
757 continue;
758
759 case '/':
760 // skip a // end-of-line comment
761 pos++;
762 skipToEndOfLine();
763 continue;
764
765 default:
766 return c;
767 }
768
769 case '#':
770 /*
771 * Skip a # hash end-of-line comment. The JSON RFC doesn't
772 * specify this behaviour, but it's required to parse
773 * existing documents. See http://b/2571423.
774 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700775 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700776 skipToEndOfLine();
777 continue;
778
779 default:
780 return c;
781 }
782 }
783
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800784 throw new EOFException("End of input");
Jesse Wilson76d7e202010-08-03 17:55:09 -0700785 }
786
Jesse Wilson1ba41712010-08-06 16:08:59 -0700787 private void checkLenient() throws IOException {
788 if (!lenient) {
789 throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
790 }
791 }
792
Jesse Wilson76d7e202010-08-03 17:55:09 -0700793 /**
794 * Advances the position until after the next newline character. If the line
795 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
796 * caller.
797 */
798 private void skipToEndOfLine() throws IOException {
799 while (pos < limit || fillBuffer(1)) {
800 char c = buffer[pos++];
801 if (c == '\r' || c == '\n') {
802 break;
803 }
804 }
805 }
806
807 private boolean skipTo(String toFind) throws IOException {
808 outer:
809 for (; pos + toFind.length() < limit || fillBuffer(toFind.length()); pos++) {
810 for (int c = 0; c < toFind.length(); c++) {
811 if (buffer[pos + c] != toFind.charAt(c)) {
812 continue outer;
813 }
814 }
815 return true;
816 }
817 return false;
818 }
819
820 /**
821 * Returns the string up to but not including {@code quote}, unescaping any
822 * character escape sequences encountered along the way. The opening quote
823 * should have already been read. This consumes the closing quote, but does
824 * not include it in the returned string.
825 *
826 * @param quote either ' or ".
827 * @throws NumberFormatException if any unicode escape sequences are
828 * malformed.
829 */
830 private String nextString(char quote) throws IOException {
831 StringBuilder builder = null;
832 do {
833 /* the index of the first character not yet appended to the builder. */
834 int start = pos;
835 while (pos < limit) {
836 int c = buffer[pos++];
837
838 if (c == quote) {
Jesse Wilsond07fb882010-08-06 19:30:04 -0700839 if (skipping) {
840 return "skipped!";
841 } else if (builder == null) {
Jesse Wilson847cf342011-04-21 11:28:31 -0700842 return stringPool.get(buffer, start, pos - start - 1);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700843 } else {
844 builder.append(buffer, start, pos - start - 1);
845 return builder.toString();
846 }
847
848 } else if (c == '\\') {
849 if (builder == null) {
850 builder = new StringBuilder();
851 }
852 builder.append(buffer, start, pos - start - 1);
853 builder.append(readEscapeCharacter());
854 start = pos;
855 }
856 }
857
858 if (builder == null) {
859 builder = new StringBuilder();
860 }
861 builder.append(buffer, start, pos - start);
862 } while (fillBuffer(1));
863
864 throw syntaxError("Unterminated string");
865 }
866
867 /**
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800868 * Reads the value up to but not including any delimiter characters. This
Jesse Wilson76d7e202010-08-03 17:55:09 -0700869 * does not consume the delimiter character.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800870 *
871 * @param assignOffsetsOnly true for this method to only set the valuePos
872 * and valueLength fields and return a null result. This only works if
873 * the literal is short; a string is returned otherwise.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700874 */
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800875 private String nextLiteral(boolean assignOffsetsOnly) throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700876 StringBuilder builder = null;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800877 valuePos = -1;
878 valueLength = 0;
879 int i = 0;
Jesse Wilson1ba41712010-08-06 16:08:59 -0700880
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800881 findNonLiteralCharacter:
882 while (true) {
883 for (; pos + i < limit; i++) {
884 switch (buffer[pos + i]) {
885 case '/':
886 case '\\':
887 case ';':
888 case '#':
889 case '=':
890 checkLenient(); // fall-through
891 case '{':
892 case '}':
893 case '[':
894 case ']':
895 case ':':
896 case ',':
897 case ' ':
898 case '\t':
899 case '\f':
900 case '\r':
901 case '\n':
902 break findNonLiteralCharacter;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700903 }
904 }
905
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800906 /*
907 * Attempt to load the entire literal into the buffer at once. If
908 * we run out of input, add a non-literal character at the end so
909 * that decoding doesn't need to do bounds checks.
910 */
911 if (i < buffer.length) {
912 if (fillBuffer(i + 1)) {
913 continue;
914 } else {
915 buffer[limit] = '\0';
916 break;
917 }
918 }
919
920 // use a StringBuilder when the value is too long. It must be an unquoted string.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700921 if (builder == null) {
922 builder = new StringBuilder();
923 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800924 builder.append(buffer, pos, i);
925 valueLength += i;
926 pos += i;
927 i = 0;
928 if (!fillBuffer(1)) {
929 break;
930 }
931 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700932
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800933 String result;
934 if (assignOffsetsOnly && builder == null) {
935 valuePos = pos;
936 result = null;
937 } else if (skipping) {
938 result = "skipped!";
939 } else if (builder == null) {
Jesse Wilson847cf342011-04-21 11:28:31 -0700940 result = stringPool.get(buffer, pos, i);
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800941 } else {
942 builder.append(buffer, pos, i);
943 result = builder.toString();
944 }
945 valueLength += i;
946 pos += i;
947 return result;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700948 }
949
950 @Override public String toString() {
951 return getClass().getSimpleName() + " near " + getSnippet();
952 }
953
954 /**
955 * Unescapes the character identified by the character or characters that
956 * immediately follow a backslash. The backslash '\' should have already
957 * been read. This supports both unicode escapes "u000A" and two-character
958 * escapes "\n".
959 *
960 * @throws NumberFormatException if any unicode escape sequences are
961 * malformed.
962 */
963 private char readEscapeCharacter() throws IOException {
964 if (pos == limit && !fillBuffer(1)) {
965 throw syntaxError("Unterminated escape sequence");
966 }
967
968 char escaped = buffer[pos++];
969 switch (escaped) {
970 case 'u':
971 if (pos + 4 > limit && !fillBuffer(4)) {
972 throw syntaxError("Unterminated escape sequence");
973 }
Jesse Wilson847cf342011-04-21 11:28:31 -0700974 String hex = stringPool.get(buffer, pos, 4);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700975 pos += 4;
976 return (char) Integer.parseInt(hex, 16);
977
978 case 't':
979 return '\t';
980
981 case 'b':
982 return '\b';
983
984 case 'n':
985 return '\n';
986
987 case 'r':
988 return '\r';
989
990 case 'f':
991 return '\f';
992
993 case '\'':
994 case '"':
995 case '\\':
996 default:
997 return escaped;
998 }
999 }
1000
1001 /**
1002 * Reads a null, boolean, numeric or unquoted string literal value.
1003 */
1004 private JsonToken readLiteral() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001005 value = nextLiteral(true);
1006 if (valueLength == 0) {
Jesse Wilson76d7e202010-08-03 17:55:09 -07001007 throw syntaxError("Expected literal value");
1008 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001009 token = decodeLiteral();
1010 if (token == JsonToken.STRING) {
1011 checkLenient();
1012 }
1013 return token;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001014 }
1015
1016 /**
1017 * Assigns {@code nextToken} based on the value of {@code nextValue}.
1018 */
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001019 private JsonToken decodeLiteral() throws IOException {
1020 if (valuePos == -1) {
1021 // it was too long to fit in the buffer so it can only be a string
1022 return JsonToken.STRING;
1023 } else if (valueLength == 4
1024 && ('n' == buffer[valuePos ] || 'N' == buffer[valuePos ])
1025 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1])
1026 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1027 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) {
1028 value = "null";
1029 return JsonToken.NULL;
1030 } else if (valueLength == 4
1031 && ('t' == buffer[valuePos ] || 'T' == buffer[valuePos ])
1032 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1])
1033 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2])
1034 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) {
1035 value = TRUE;
1036 return JsonToken.BOOLEAN;
1037 } else if (valueLength == 5
1038 && ('f' == buffer[valuePos ] || 'F' == buffer[valuePos ])
1039 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1])
1040 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1041 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3])
1042 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) {
1043 value = FALSE;
1044 return JsonToken.BOOLEAN;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001045 } else {
Jesse Wilson847cf342011-04-21 11:28:31 -07001046 value = stringPool.get(buffer, valuePos, valueLength);
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001047 return decodeNumber(buffer, valuePos, valueLength);
1048 }
1049 }
1050
1051 /**
1052 * Determine whether the characters is a JSON number. Numbers are of the
1053 * form -12.34e+56. Fractional and exponential parts are optional. Leading
1054 * zeroes are not allowed in the value or exponential part, but are allowed
1055 * in the fraction.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001056 */
1057 private JsonToken decodeNumber(char[] chars, int offset, int length) {
1058 int i = offset;
1059 int c = chars[i];
1060
1061 if (c == '-') {
1062 c = chars[++i];
1063 }
1064
1065 if (c == '0') {
1066 c = chars[++i];
1067 } else if (c >= '1' && c <= '9') {
1068 c = chars[++i];
1069 while (c >= '0' && c <= '9') {
1070 c = chars[++i];
Jesse Wilson76d7e202010-08-03 17:55:09 -07001071 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001072 } else {
1073 return JsonToken.STRING;
1074 }
1075
1076 if (c == '.') {
1077 c = chars[++i];
1078 while (c >= '0' && c <= '9') {
1079 c = chars[++i];
1080 }
1081 }
1082
1083 if (c == 'e' || c == 'E') {
1084 c = chars[++i];
1085 if (c == '+' || c == '-') {
1086 c = chars[++i];
1087 }
1088 if (c >= '0' && c <= '9') {
1089 c = chars[++i];
1090 while (c >= '0' && c <= '9') {
1091 c = chars[++i];
1092 }
1093 } else {
1094 return JsonToken.STRING;
1095 }
1096 }
1097
1098 if (i == offset + length) {
1099 return JsonToken.NUMBER;
1100 } else {
1101 return JsonToken.STRING;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001102 }
1103 }
1104
1105 /**
1106 * Throws a new IO exception with the given message and a context snippet
1107 * with this reader's content.
1108 */
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -08001109 private IOException syntaxError(String message) throws IOException {
1110 throw new MalformedJsonException(message + " near " + getSnippet());
Jesse Wilson76d7e202010-08-03 17:55:09 -07001111 }
1112
1113 private CharSequence getSnippet() {
1114 StringBuilder snippet = new StringBuilder();
1115 int beforePos = Math.min(pos, 20);
1116 snippet.append(buffer, pos - beforePos, beforePos);
1117 int afterPos = Math.min(limit - pos, 20);
1118 snippet.append(buffer, pos, afterPos);
1119 return snippet;
1120 }
Jesse Wilson76d7e202010-08-03 17:55:09 -07001121}