blob: 7d1c6c4918db5ec64d7d690f1b95884ec9ac2395 [file] [log] [blame]
Jesse Wilson76d7e202010-08-03 17:55:09 -07001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.util;
18
Jesse Wilson847cf342011-04-21 11:28:31 -070019import java.io.Closeable;
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -080020import java.io.EOFException;
Jesse Wilson76d7e202010-08-03 17:55:09 -070021import java.io.IOException;
22import java.io.Reader;
Jesse Wilson76d7e202010-08-03 17:55:09 -070023import java.util.ArrayList;
24import java.util.List;
Jesse Wilson847cf342011-04-21 11:28:31 -070025import libcore.internal.StringPool;
Jesse Wilson76d7e202010-08-03 17:55:09 -070026
27/**
28 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
29 * encoded value as a stream of tokens. This stream includes both literal
30 * values (strings, numbers, booleans, and nulls) as well as the begin and
31 * end delimiters of objects and arrays. The tokens are traversed in
32 * depth-first order, the same order that they appear in the JSON document.
33 * Within JSON objects, name/value pairs are represented by a single token.
34 *
35 * <h3>Parsing JSON</h3>
Jesse Wilson3312b292010-10-15 17:33:54 -070036 * To create a recursive descent parser for your own JSON streams, first create
37 * an entry point method that creates a {@code JsonReader}.
Jesse Wilson76d7e202010-08-03 17:55:09 -070038 *
39 * <p>Next, create handler methods for each structure in your JSON text. You'll
40 * need a method for each object type and for each array type.
41 * <ul>
42 * <li>Within <strong>array handling</strong> methods, first call {@link
43 * #beginArray} to consume the array's opening bracket. Then create a
44 * while loop that accumulates values, terminating when {@link #hasNext}
45 * is false. Finally, read the array's closing bracket by calling {@link
46 * #endArray}.
47 * <li>Within <strong>object handling</strong> methods, first call {@link
48 * #beginObject} to consume the object's opening brace. Then create a
49 * while loop that assigns values to local variables based on their name.
50 * This loop should terminate when {@link #hasNext} is false. Finally,
51 * read the object's closing brace by calling {@link #endObject}.
52 * </ul>
53 * <p>When a nested object or array is encountered, delegate to the
54 * corresponding handler method.
55 *
56 * <p>When an unknown name is encountered, strict parsers should fail with an
57 * exception. Lenient parsers should call {@link #skipValue()} to recursively
58 * skip the value's nested tokens, which may otherwise conflict.
59 *
60 * <p>If a value may be null, you should first check using {@link #peek()}.
61 * Null literals can be consumed using either {@link #nextNull()} or {@link
62 * #skipValue()}.
63 *
64 * <h3>Example</h3>
65 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code
66 * [
67 * {
68 * "id": 912345678901,
69 * "text": "How do I read JSON on Android?",
70 * "geo": null,
71 * "user": {
72 * "name": "android_newb",
73 * "followers_count": 41
74 * }
75 * },
76 * {
77 * "id": 912345678902,
78 * "text": "@android_newb just use android.util.JsonReader!",
79 * "geo": [50.454722, -104.606667],
80 * "user": {
81 * "name": "jesse",
82 * "followers_count": 2
83 * }
84 * }
85 * ]}</pre>
86 * This code implements the parser for the above structure: <pre> {@code
87 *
88 * public List<Message> readJsonStream(InputStream in) throws IOException {
89 * JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
Jesse Wilson9d30ea02011-03-16 15:02:48 -070090 * try {
91 * return readMessagesArray(reader);
92 * } finally {
93 * reader.close();
94 * }
Jesse Wilson76d7e202010-08-03 17:55:09 -070095 * }
96 *
97 * public List<Message> readMessagesArray(JsonReader reader) throws IOException {
98 * List<Message> messages = new ArrayList<Message>();
99 *
100 * reader.beginArray();
101 * while (reader.hasNext()) {
102 * messages.add(readMessage(reader));
103 * }
104 * reader.endArray();
105 * return messages;
106 * }
107 *
108 * public Message readMessage(JsonReader reader) throws IOException {
109 * long id = -1;
110 * String text = null;
111 * User user = null;
112 * List<Double> geo = null;
113 *
114 * reader.beginObject();
115 * while (reader.hasNext()) {
116 * String name = reader.nextName();
117 * if (name.equals("id")) {
118 * id = reader.nextLong();
119 * } else if (name.equals("text")) {
120 * text = reader.nextString();
121 * } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
122 * geo = readDoublesArray(reader);
123 * } else if (name.equals("user")) {
124 * user = readUser(reader);
125 * } else {
126 * reader.skipValue();
127 * }
128 * }
129 * reader.endObject();
130 * return new Message(id, text, user, geo);
131 * }
132 *
133 * public List<Double> readDoublesArray(JsonReader reader) throws IOException {
134 * List<Double> doubles = new ArrayList<Double>();
135 *
136 * reader.beginArray();
137 * while (reader.hasNext()) {
138 * doubles.add(reader.nextDouble());
139 * }
140 * reader.endArray();
141 * return doubles;
142 * }
143 *
144 * public User readUser(JsonReader reader) throws IOException {
145 * String username = null;
146 * int followersCount = -1;
147 *
148 * reader.beginObject();
149 * while (reader.hasNext()) {
150 * String name = reader.nextName();
151 * if (name.equals("name")) {
152 * username = reader.nextString();
153 * } else if (name.equals("followers_count")) {
154 * followersCount = reader.nextInt();
155 * } else {
156 * reader.skipValue();
157 * }
158 * }
159 * reader.endObject();
160 * return new User(username, followersCount);
161 * }}</pre>
162 *
163 * <h3>Number Handling</h3>
164 * This reader permits numeric values to be read as strings and string values to
165 * be read as numbers. For example, both elements of the JSON array {@code
166 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}.
167 * This behavior is intended to prevent lossy numeric conversions: double is
168 * JavaScript's only numeric type and very large values like {@code
169 * 9007199254740993} cannot be represented exactly on that platform. To minimize
170 * precision loss, extremely large values should be written and read as strings
171 * in JSON.
172 *
173 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances
174 * of this class are not thread safe.
175 */
176public final class JsonReader implements Closeable {
177
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800178 private static final String TRUE = "true";
179 private static final String FALSE = "false";
180
Jesse Wilson847cf342011-04-21 11:28:31 -0700181 private final StringPool stringPool = new StringPool();
182
Jesse Wilson76d7e202010-08-03 17:55:09 -0700183 /** The input JSON. */
184 private final Reader in;
185
Jesse Wilson1ba41712010-08-06 16:08:59 -0700186 /** True to accept non-spec compliant JSON */
187 private boolean lenient = false;
188
Jesse Wilson76d7e202010-08-03 17:55:09 -0700189 /**
190 * Use a manual buffer to easily read and unread upcoming characters, and
191 * also so we can create strings without an intermediate StringBuilder.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800192 * We decode literals directly out of this buffer, so it must be at least as
193 * long as the longest token that can be reported as a number.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700194 */
195 private final char[] buffer = new char[1024];
196 private int pos = 0;
197 private int limit = 0;
198
Jesse Wilsonfebae4e2011-07-18 12:58:03 -0700199 /*
200 * The offset of the first character in the buffer.
201 */
202 private int bufferStartLine = 1;
203 private int bufferStartColumn = 1;
204
Jesse Wilson76d7e202010-08-03 17:55:09 -0700205 private final List<JsonScope> stack = new ArrayList<JsonScope>();
206 {
207 push(JsonScope.EMPTY_DOCUMENT);
208 }
209
210 /**
Jesse Wilson76d7e202010-08-03 17:55:09 -0700211 * The type of the next token to be returned by {@link #peek} and {@link
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800212 * #advance}. If null, peek() will assign a value.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700213 */
214 private JsonToken token;
215
216 /** The text of the next name. */
217 private String name;
218
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800219 /*
220 * For the next literal value, we may have the text value, or the position
221 * and length in the buffer.
222 */
Jesse Wilson76d7e202010-08-03 17:55:09 -0700223 private String value;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800224 private int valuePos;
225 private int valueLength;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700226
Jesse Wilsond07fb882010-08-06 19:30:04 -0700227 /** True if we're currently handling a skipValue() call. */
228 private boolean skipping = false;
229
Jesse Wilson76d7e202010-08-03 17:55:09 -0700230 /**
231 * Creates a new instance that reads a JSON-encoded stream from {@code in}.
232 */
233 public JsonReader(Reader in) {
234 if (in == null) {
235 throw new NullPointerException("in == null");
236 }
237 this.in = in;
238 }
239
240 /**
Jesse Wilson1ba41712010-08-06 16:08:59 -0700241 * Configure this parser to be be liberal in what it accepts. By default,
242 * this parser is strict and only accepts JSON as specified by <a
243 * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the
244 * parser to lenient causes it to ignore the following syntax errors:
245 *
246 * <ul>
247 * <li>End of line comments starting with {@code //} or {@code #} and
248 * ending with a newline character.
249 * <li>C-style comments starting with {@code /*} and ending with
250 * {@code *}{@code /}. Such comments may not be nested.
251 * <li>Names that are unquoted or {@code 'single quoted'}.
252 * <li>Strings that are unquoted or {@code 'single quoted'}.
253 * <li>Array elements separated by {@code ;} instead of {@code ,}.
254 * <li>Unnecessary array separators. These are interpreted as if null
255 * was the omitted value.
256 * <li>Names and values separated by {@code =} or {@code =>} instead of
257 * {@code :}.
258 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}.
259 * </ul>
260 */
261 public void setLenient(boolean lenient) {
262 this.lenient = lenient;
263 }
264
265 /**
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800266 * Returns true if this parser is liberal in what it accepts.
267 */
268 public boolean isLenient() {
269 return lenient;
270 }
271
272 /**
Jesse Wilson76d7e202010-08-03 17:55:09 -0700273 * Consumes the next token from the JSON stream and asserts that it is the
274 * beginning of a new array.
275 */
276 public void beginArray() throws IOException {
277 expect(JsonToken.BEGIN_ARRAY);
278 }
279
280 /**
281 * Consumes the next token from the JSON stream and asserts that it is the
282 * end of the current array.
283 */
284 public void endArray() throws IOException {
285 expect(JsonToken.END_ARRAY);
286 }
287
288 /**
289 * Consumes the next token from the JSON stream and asserts that it is the
290 * beginning of a new object.
291 */
292 public void beginObject() throws IOException {
293 expect(JsonToken.BEGIN_OBJECT);
294 }
295
296 /**
297 * Consumes the next token from the JSON stream and asserts that it is the
298 * end of the current array.
299 */
300 public void endObject() throws IOException {
301 expect(JsonToken.END_OBJECT);
302 }
303
304 /**
Jesse Wilson1ba41712010-08-06 16:08:59 -0700305 * Consumes {@code expected}.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700306 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700307 private void expect(JsonToken expected) throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800308 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700309 if (token != expected) {
310 throw new IllegalStateException("Expected " + expected + " but was " + peek());
Jesse Wilson76d7e202010-08-03 17:55:09 -0700311 }
312 advance();
313 }
314
315 /**
316 * Returns true if the current array or object has another element.
317 */
318 public boolean hasNext() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800319 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700320 return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700321 }
322
323 /**
324 * Returns the type of the next token without consuming it.
325 */
326 public JsonToken peek() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800327 if (token != null) {
328 return token;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700329 }
330
331 switch (peekStack()) {
332 case EMPTY_DOCUMENT:
333 replaceTop(JsonScope.NONEMPTY_DOCUMENT);
334 JsonToken firstToken = nextValue();
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800335 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700336 throw new IOException(
337 "Expected JSON document to start with '[' or '{' but was " + token);
338 }
339 return firstToken;
340 case EMPTY_ARRAY:
341 return nextInArray(true);
342 case NONEMPTY_ARRAY:
343 return nextInArray(false);
344 case EMPTY_OBJECT:
345 return nextInObject(true);
346 case DANGLING_NAME:
347 return objectValue();
348 case NONEMPTY_OBJECT:
349 return nextInObject(false);
350 case NONEMPTY_DOCUMENT:
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800351 try {
352 JsonToken token = nextValue();
353 if (lenient) {
354 return token;
355 }
356 throw syntaxError("Expected EOF");
357 } catch (EOFException e) {
358 return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here?
359 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700360 case CLOSED:
361 throw new IllegalStateException("JsonReader is closed");
362 default:
363 throw new AssertionError();
364 }
365 }
366
367 /**
368 * Advances the cursor in the JSON stream to the next token.
369 */
370 private JsonToken advance() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800371 peek();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700372
373 JsonToken result = token;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700374 token = null;
375 value = null;
376 name = null;
377 return result;
378 }
379
380 /**
381 * Returns the next token, a {@link JsonToken#NAME property name}, and
382 * consumes it.
383 *
384 * @throws IOException if the next token in the stream is not a property
385 * name.
386 */
387 public String nextName() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800388 peek();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700389 if (token != JsonToken.NAME) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700390 throw new IllegalStateException("Expected a name but was " + peek());
391 }
392 String result = name;
393 advance();
394 return result;
395 }
396
397 /**
398 * Returns the {@link JsonToken#STRING string} value of the next token,
399 * consuming it. If the next token is a number, this method will return its
400 * string form.
401 *
402 * @throws IllegalStateException if the next token is not a string or if
403 * this reader is closed.
404 */
405 public String nextString() throws IOException {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700406 peek();
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800407 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700408 throw new IllegalStateException("Expected a string but was " + peek());
409 }
410
411 String result = value;
412 advance();
413 return result;
414 }
415
416 /**
417 * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token,
418 * consuming it.
419 *
420 * @throws IllegalStateException if the next token is not a boolean or if
421 * this reader is closed.
422 */
423 public boolean nextBoolean() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800424 peek();
425 if (token != JsonToken.BOOLEAN) {
426 throw new IllegalStateException("Expected a boolean but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700427 }
428
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800429 boolean result = (value == TRUE);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700430 advance();
431 return result;
432 }
433
434 /**
435 * Consumes the next token from the JSON stream and asserts that it is a
436 * literal null.
437 *
438 * @throws IllegalStateException if the next token is not null or if this
439 * reader is closed.
440 */
441 public void nextNull() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800442 peek();
443 if (token != JsonToken.NULL) {
444 throw new IllegalStateException("Expected null but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700445 }
446
447 advance();
448 }
449
450 /**
451 * Returns the {@link JsonToken#NUMBER double} value of the next token,
452 * consuming it. If the next token is a string, this method will attempt to
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800453 * parse it as a double using {@link Double#parseDouble(String)}.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700454 *
455 * @throws IllegalStateException if the next token is not a literal value.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700456 */
457 public double nextDouble() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800458 peek();
459 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
460 throw new IllegalStateException("Expected a double but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700461 }
462
463 double result = Double.parseDouble(value);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700464 advance();
465 return result;
466 }
467
468 /**
469 * Returns the {@link JsonToken#NUMBER long} value of the next token,
470 * consuming it. If the next token is a string, this method will attempt to
471 * parse it as a long. If the next token's numeric value cannot be exactly
472 * represented by a Java {@code long}, this method throws.
473 *
474 * @throws IllegalStateException if the next token is not a literal value.
475 * @throws NumberFormatException if the next literal value cannot be parsed
476 * as a number, or exactly represented as a long.
477 */
478 public long nextLong() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800479 peek();
480 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
481 throw new IllegalStateException("Expected a long but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700482 }
483
484 long result;
485 try {
486 result = Long.parseLong(value);
487 } catch (NumberFormatException ignored) {
488 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
489 result = (long) asDouble;
490 if ((double) result != asDouble) {
491 throw new NumberFormatException(value);
492 }
493 }
494
Jesse Wilson76d7e202010-08-03 17:55:09 -0700495 advance();
496 return result;
497 }
498
499 /**
500 * Returns the {@link JsonToken#NUMBER int} value of the next token,
501 * consuming it. If the next token is a string, this method will attempt to
502 * parse it as an int. If the next token's numeric value cannot be exactly
503 * represented by a Java {@code int}, this method throws.
504 *
505 * @throws IllegalStateException if the next token is not a literal value.
506 * @throws NumberFormatException if the next literal value cannot be parsed
507 * as a number, or exactly represented as an int.
508 */
509 public int nextInt() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800510 peek();
511 if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
512 throw new IllegalStateException("Expected an int but was " + token);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700513 }
514
515 int result;
516 try {
517 result = Integer.parseInt(value);
518 } catch (NumberFormatException ignored) {
519 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
520 result = (int) asDouble;
521 if ((double) result != asDouble) {
522 throw new NumberFormatException(value);
523 }
524 }
525
Jesse Wilson76d7e202010-08-03 17:55:09 -0700526 advance();
527 return result;
528 }
529
530 /**
531 * Closes this JSON reader and the underlying {@link Reader}.
532 */
533 public void close() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700534 value = null;
535 token = null;
536 stack.clear();
537 stack.add(JsonScope.CLOSED);
538 in.close();
539 }
540
541 /**
542 * Skips the next value recursively. If it is an object or array, all nested
543 * elements are skipped. This method is intended for use when the JSON token
544 * stream contains unrecognized or unhandled values.
545 */
546 public void skipValue() throws IOException {
Jesse Wilsond07fb882010-08-06 19:30:04 -0700547 skipping = true;
548 try {
Calin Juravle8fbcc6b2014-02-18 19:08:39 +0000549 if (!hasNext() || peek() == JsonToken.END_DOCUMENT) {
550 throw new IllegalStateException("No element left to skip");
551 }
Jesse Wilsond07fb882010-08-06 19:30:04 -0700552 int count = 0;
553 do {
554 JsonToken token = advance();
555 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) {
556 count++;
557 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) {
558 count--;
559 }
560 } while (count != 0);
561 } finally {
562 skipping = false;
563 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700564 }
565
566 private JsonScope peekStack() {
567 return stack.get(stack.size() - 1);
568 }
569
570 private JsonScope pop() {
571 return stack.remove(stack.size() - 1);
572 }
573
574 private void push(JsonScope newTop) {
575 stack.add(newTop);
576 }
577
578 /**
579 * Replace the value on the top of the stack with the given value.
580 */
581 private void replaceTop(JsonScope newTop) {
582 stack.set(stack.size() - 1, newTop);
583 }
584
585 private JsonToken nextInArray(boolean firstElement) throws IOException {
586 if (firstElement) {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700587 replaceTop(JsonScope.NONEMPTY_ARRAY);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700588 } else {
Jesse Wilson1ba41712010-08-06 16:08:59 -0700589 /* Look for a comma before each element after the first element. */
Jesse Wilson76d7e202010-08-03 17:55:09 -0700590 switch (nextNonWhitespace()) {
591 case ']':
592 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700593 return token = JsonToken.END_ARRAY;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700594 case ';':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700595 checkLenient(); // fall-through
596 case ',':
Jesse Wilson76d7e202010-08-03 17:55:09 -0700597 break;
598 default:
599 throw syntaxError("Unterminated array");
600 }
601 }
602
Jesse Wilson1ba41712010-08-06 16:08:59 -0700603 switch (nextNonWhitespace()) {
604 case ']':
605 if (firstElement) {
606 pop();
Jesse Wilson1ba41712010-08-06 16:08:59 -0700607 return token = JsonToken.END_ARRAY;
608 }
609 // fall-through to handle ",]"
610 case ';':
611 case ',':
612 /* In lenient mode, a 0-length literal means 'null' */
613 checkLenient();
614 pos--;
Jesse Wilson1ba41712010-08-06 16:08:59 -0700615 value = "null";
616 return token = JsonToken.NULL;
617 default:
618 pos--;
619 return nextValue();
620 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700621 }
622
623 private JsonToken nextInObject(boolean firstElement) throws IOException {
624 /*
625 * Read delimiters. Either a comma/semicolon separating this and the
626 * previous name-value pair, or a close brace to denote the end of the
627 * object.
628 */
629 if (firstElement) {
630 /* Peek to see if this is the empty object. */
631 switch (nextNonWhitespace()) {
632 case '}':
633 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700634 return token = JsonToken.END_OBJECT;
635 default:
636 pos--;
637 }
638 } else {
639 switch (nextNonWhitespace()) {
640 case '}':
641 pop();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700642 return token = JsonToken.END_OBJECT;
643 case ';':
644 case ',':
645 break;
646 default:
647 throw syntaxError("Unterminated object");
648 }
649 }
650
651 /* Read the name. */
652 int quote = nextNonWhitespace();
653 switch (quote) {
654 case '\'':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700655 checkLenient(); // fall-through
Jesse Wilson76d7e202010-08-03 17:55:09 -0700656 case '"':
657 name = nextString((char) quote);
658 break;
659 default:
Jesse Wilson1ba41712010-08-06 16:08:59 -0700660 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700661 pos--;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800662 name = nextLiteral(false);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700663 if (name.isEmpty()) {
664 throw syntaxError("Expected name");
665 }
666 }
667
668 replaceTop(JsonScope.DANGLING_NAME);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700669 return token = JsonToken.NAME;
670 }
671
672 private JsonToken objectValue() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700673 /*
Jesse Wilson1ba41712010-08-06 16:08:59 -0700674 * Read the name/value separator. Usually a colon ':'. In lenient mode
675 * we also accept an equals sign '=', or an arrow "=>".
Jesse Wilson76d7e202010-08-03 17:55:09 -0700676 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700677 switch (nextNonWhitespace()) {
678 case ':':
679 break;
680 case '=':
681 checkLenient();
682 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') {
683 pos++;
684 }
685 break;
686 default:
687 throw syntaxError("Expected ':'");
Jesse Wilson76d7e202010-08-03 17:55:09 -0700688 }
689
690 replaceTop(JsonScope.NONEMPTY_OBJECT);
691 return nextValue();
692 }
693
694 private JsonToken nextValue() throws IOException {
695 int c = nextNonWhitespace();
696 switch (c) {
697 case '{':
698 push(JsonScope.EMPTY_OBJECT);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700699 return token = JsonToken.BEGIN_OBJECT;
700
701 case '[':
702 push(JsonScope.EMPTY_ARRAY);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700703 return token = JsonToken.BEGIN_ARRAY;
704
705 case '\'':
Jesse Wilson1ba41712010-08-06 16:08:59 -0700706 checkLenient(); // fall-through
Jesse Wilson76d7e202010-08-03 17:55:09 -0700707 case '"':
708 value = nextString((char) c);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700709 return token = JsonToken.STRING;
710
711 default:
712 pos--;
713 return readLiteral();
714 }
715 }
716
717 /**
718 * Returns true once {@code limit - pos >= minimum}. If the data is
719 * exhausted before that many characters are available, this returns
720 * false.
721 */
722 private boolean fillBuffer(int minimum) throws IOException {
Jesse Wilsonfebae4e2011-07-18 12:58:03 -0700723 // Before clobbering the old characters, update where buffer starts
724 for (int i = 0; i < pos; i++) {
725 if (buffer[i] == '\n') {
726 bufferStartLine++;
727 bufferStartColumn = 1;
728 } else {
729 bufferStartColumn++;
730 }
731 }
732
Jesse Wilson76d7e202010-08-03 17:55:09 -0700733 if (limit != pos) {
734 limit -= pos;
735 System.arraycopy(buffer, pos, buffer, 0, limit);
736 } else {
737 limit = 0;
738 }
739
740 pos = 0;
741 int total;
742 while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
743 limit += total;
Jesse Wilson7a2c8132011-07-20 12:23:23 -0700744
745 // if this is the first read, consume an optional byte order mark (BOM) if it exists
Jesse Wilsond1ad3c22011-07-22 09:01:48 -0700746 if (bufferStartLine == 1 && bufferStartColumn == 1
747 && limit > 0 && buffer[0] == '\ufeff') {
Jesse Wilson7a2c8132011-07-20 12:23:23 -0700748 pos++;
749 bufferStartColumn--;
750 }
751
Jesse Wilson76d7e202010-08-03 17:55:09 -0700752 if (limit >= minimum) {
753 return true;
754 }
755 }
756 return false;
757 }
758
Jesse Wilsonfebae4e2011-07-18 12:58:03 -0700759 private int getLineNumber() {
760 int result = bufferStartLine;
761 for (int i = 0; i < pos; i++) {
762 if (buffer[i] == '\n') {
763 result++;
764 }
765 }
766 return result;
767 }
768
769 private int getColumnNumber() {
770 int result = bufferStartColumn;
771 for (int i = 0; i < pos; i++) {
772 if (buffer[i] == '\n') {
773 result = 1;
774 } else {
775 result++;
776 }
777 }
778 return result;
779 }
780
Jesse Wilson76d7e202010-08-03 17:55:09 -0700781 private int nextNonWhitespace() throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700782 while (pos < limit || fillBuffer(1)) {
783 int c = buffer[pos++];
784 switch (c) {
785 case '\t':
786 case ' ':
787 case '\n':
788 case '\r':
789 continue;
790
791 case '/':
792 if (pos == limit && !fillBuffer(1)) {
793 return c;
794 }
795
Jesse Wilson1ba41712010-08-06 16:08:59 -0700796 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700797 char peek = buffer[pos];
798 switch (peek) {
799 case '*':
800 // skip a /* c-style comment */
801 pos++;
802 if (!skipTo("*/")) {
803 throw syntaxError("Unterminated comment");
804 }
805 pos += 2;
806 continue;
807
808 case '/':
809 // skip a // end-of-line comment
810 pos++;
811 skipToEndOfLine();
812 continue;
813
814 default:
815 return c;
816 }
817
818 case '#':
819 /*
820 * Skip a # hash end-of-line comment. The JSON RFC doesn't
821 * specify this behaviour, but it's required to parse
822 * existing documents. See http://b/2571423.
823 */
Jesse Wilson1ba41712010-08-06 16:08:59 -0700824 checkLenient();
Jesse Wilson76d7e202010-08-03 17:55:09 -0700825 skipToEndOfLine();
826 continue;
827
828 default:
829 return c;
830 }
831 }
832
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -0800833 throw new EOFException("End of input");
Jesse Wilson76d7e202010-08-03 17:55:09 -0700834 }
835
Jesse Wilson1ba41712010-08-06 16:08:59 -0700836 private void checkLenient() throws IOException {
837 if (!lenient) {
838 throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
839 }
840 }
841
Jesse Wilson76d7e202010-08-03 17:55:09 -0700842 /**
843 * Advances the position until after the next newline character. If the line
844 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
845 * caller.
846 */
847 private void skipToEndOfLine() throws IOException {
848 while (pos < limit || fillBuffer(1)) {
849 char c = buffer[pos++];
850 if (c == '\r' || c == '\n') {
851 break;
852 }
853 }
854 }
855
856 private boolean skipTo(String toFind) throws IOException {
857 outer:
Jesse Wilsond1ad3c22011-07-22 09:01:48 -0700858 for (; pos + toFind.length() <= limit || fillBuffer(toFind.length()); pos++) {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700859 for (int c = 0; c < toFind.length(); c++) {
860 if (buffer[pos + c] != toFind.charAt(c)) {
861 continue outer;
862 }
863 }
864 return true;
865 }
866 return false;
867 }
868
869 /**
870 * Returns the string up to but not including {@code quote}, unescaping any
871 * character escape sequences encountered along the way. The opening quote
872 * should have already been read. This consumes the closing quote, but does
873 * not include it in the returned string.
874 *
875 * @param quote either ' or ".
876 * @throws NumberFormatException if any unicode escape sequences are
877 * malformed.
878 */
879 private String nextString(char quote) throws IOException {
880 StringBuilder builder = null;
881 do {
882 /* the index of the first character not yet appended to the builder. */
883 int start = pos;
884 while (pos < limit) {
885 int c = buffer[pos++];
886
887 if (c == quote) {
Jesse Wilsond07fb882010-08-06 19:30:04 -0700888 if (skipping) {
889 return "skipped!";
890 } else if (builder == null) {
Jesse Wilson847cf342011-04-21 11:28:31 -0700891 return stringPool.get(buffer, start, pos - start - 1);
Jesse Wilson76d7e202010-08-03 17:55:09 -0700892 } else {
893 builder.append(buffer, start, pos - start - 1);
894 return builder.toString();
895 }
896
897 } else if (c == '\\') {
898 if (builder == null) {
899 builder = new StringBuilder();
900 }
901 builder.append(buffer, start, pos - start - 1);
902 builder.append(readEscapeCharacter());
903 start = pos;
904 }
905 }
906
907 if (builder == null) {
908 builder = new StringBuilder();
909 }
910 builder.append(buffer, start, pos - start);
911 } while (fillBuffer(1));
912
913 throw syntaxError("Unterminated string");
914 }
915
916 /**
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800917 * Reads the value up to but not including any delimiter characters. This
Jesse Wilson76d7e202010-08-03 17:55:09 -0700918 * does not consume the delimiter character.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800919 *
920 * @param assignOffsetsOnly true for this method to only set the valuePos
921 * and valueLength fields and return a null result. This only works if
922 * the literal is short; a string is returned otherwise.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700923 */
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800924 private String nextLiteral(boolean assignOffsetsOnly) throws IOException {
Jesse Wilson76d7e202010-08-03 17:55:09 -0700925 StringBuilder builder = null;
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800926 valuePos = -1;
927 valueLength = 0;
928 int i = 0;
Jesse Wilson1ba41712010-08-06 16:08:59 -0700929
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800930 findNonLiteralCharacter:
931 while (true) {
932 for (; pos + i < limit; i++) {
933 switch (buffer[pos + i]) {
934 case '/':
935 case '\\':
936 case ';':
937 case '#':
938 case '=':
939 checkLenient(); // fall-through
940 case '{':
941 case '}':
942 case '[':
943 case ']':
944 case ':':
945 case ',':
946 case ' ':
947 case '\t':
948 case '\f':
949 case '\r':
950 case '\n':
951 break findNonLiteralCharacter;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700952 }
953 }
954
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800955 /*
956 * Attempt to load the entire literal into the buffer at once. If
957 * we run out of input, add a non-literal character at the end so
958 * that decoding doesn't need to do bounds checks.
959 */
960 if (i < buffer.length) {
961 if (fillBuffer(i + 1)) {
962 continue;
963 } else {
964 buffer[limit] = '\0';
965 break;
966 }
967 }
968
969 // use a StringBuilder when the value is too long. It must be an unquoted string.
Jesse Wilson76d7e202010-08-03 17:55:09 -0700970 if (builder == null) {
971 builder = new StringBuilder();
972 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800973 builder.append(buffer, pos, i);
974 valueLength += i;
975 pos += i;
976 i = 0;
977 if (!fillBuffer(1)) {
978 break;
979 }
980 }
Jesse Wilson76d7e202010-08-03 17:55:09 -0700981
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800982 String result;
983 if (assignOffsetsOnly && builder == null) {
984 valuePos = pos;
985 result = null;
986 } else if (skipping) {
987 result = "skipped!";
988 } else if (builder == null) {
Jesse Wilson847cf342011-04-21 11:28:31 -0700989 result = stringPool.get(buffer, pos, i);
Jesse Wilson9d9b4e72010-11-17 17:46:42 -0800990 } else {
991 builder.append(buffer, pos, i);
992 result = builder.toString();
993 }
994 valueLength += i;
995 pos += i;
996 return result;
Jesse Wilson76d7e202010-08-03 17:55:09 -0700997 }
998
999 @Override public String toString() {
1000 return getClass().getSimpleName() + " near " + getSnippet();
1001 }
1002
1003 /**
1004 * Unescapes the character identified by the character or characters that
1005 * immediately follow a backslash. The backslash '\' should have already
1006 * been read. This supports both unicode escapes "u000A" and two-character
1007 * escapes "\n".
1008 *
1009 * @throws NumberFormatException if any unicode escape sequences are
1010 * malformed.
1011 */
1012 private char readEscapeCharacter() throws IOException {
1013 if (pos == limit && !fillBuffer(1)) {
1014 throw syntaxError("Unterminated escape sequence");
1015 }
1016
1017 char escaped = buffer[pos++];
1018 switch (escaped) {
1019 case 'u':
1020 if (pos + 4 > limit && !fillBuffer(4)) {
1021 throw syntaxError("Unterminated escape sequence");
1022 }
Jesse Wilson847cf342011-04-21 11:28:31 -07001023 String hex = stringPool.get(buffer, pos, 4);
Jesse Wilson76d7e202010-08-03 17:55:09 -07001024 pos += 4;
1025 return (char) Integer.parseInt(hex, 16);
1026
1027 case 't':
1028 return '\t';
1029
1030 case 'b':
1031 return '\b';
1032
1033 case 'n':
1034 return '\n';
1035
1036 case 'r':
1037 return '\r';
1038
1039 case 'f':
1040 return '\f';
1041
1042 case '\'':
1043 case '"':
1044 case '\\':
1045 default:
1046 return escaped;
1047 }
1048 }
1049
1050 /**
1051 * Reads a null, boolean, numeric or unquoted string literal value.
1052 */
1053 private JsonToken readLiteral() throws IOException {
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001054 value = nextLiteral(true);
1055 if (valueLength == 0) {
Jesse Wilson76d7e202010-08-03 17:55:09 -07001056 throw syntaxError("Expected literal value");
1057 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001058 token = decodeLiteral();
1059 if (token == JsonToken.STRING) {
1060 checkLenient();
1061 }
1062 return token;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001063 }
1064
1065 /**
1066 * Assigns {@code nextToken} based on the value of {@code nextValue}.
1067 */
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001068 private JsonToken decodeLiteral() throws IOException {
1069 if (valuePos == -1) {
1070 // it was too long to fit in the buffer so it can only be a string
1071 return JsonToken.STRING;
1072 } else if (valueLength == 4
1073 && ('n' == buffer[valuePos ] || 'N' == buffer[valuePos ])
1074 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1])
1075 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1076 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) {
1077 value = "null";
1078 return JsonToken.NULL;
1079 } else if (valueLength == 4
1080 && ('t' == buffer[valuePos ] || 'T' == buffer[valuePos ])
1081 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1])
1082 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2])
1083 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) {
1084 value = TRUE;
1085 return JsonToken.BOOLEAN;
1086 } else if (valueLength == 5
1087 && ('f' == buffer[valuePos ] || 'F' == buffer[valuePos ])
1088 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1])
1089 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1090 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3])
1091 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) {
1092 value = FALSE;
1093 return JsonToken.BOOLEAN;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001094 } else {
Jesse Wilson847cf342011-04-21 11:28:31 -07001095 value = stringPool.get(buffer, valuePos, valueLength);
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001096 return decodeNumber(buffer, valuePos, valueLength);
1097 }
1098 }
1099
1100 /**
1101 * Determine whether the characters is a JSON number. Numbers are of the
1102 * form -12.34e+56. Fractional and exponential parts are optional. Leading
1103 * zeroes are not allowed in the value or exponential part, but are allowed
1104 * in the fraction.
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001105 */
1106 private JsonToken decodeNumber(char[] chars, int offset, int length) {
1107 int i = offset;
1108 int c = chars[i];
1109
1110 if (c == '-') {
1111 c = chars[++i];
1112 }
1113
1114 if (c == '0') {
1115 c = chars[++i];
1116 } else if (c >= '1' && c <= '9') {
1117 c = chars[++i];
1118 while (c >= '0' && c <= '9') {
1119 c = chars[++i];
Jesse Wilson76d7e202010-08-03 17:55:09 -07001120 }
Jesse Wilson9d9b4e72010-11-17 17:46:42 -08001121 } else {
1122 return JsonToken.STRING;
1123 }
1124
1125 if (c == '.') {
1126 c = chars[++i];
1127 while (c >= '0' && c <= '9') {
1128 c = chars[++i];
1129 }
1130 }
1131
1132 if (c == 'e' || c == 'E') {
1133 c = chars[++i];
1134 if (c == '+' || c == '-') {
1135 c = chars[++i];
1136 }
1137 if (c >= '0' && c <= '9') {
1138 c = chars[++i];
1139 while (c >= '0' && c <= '9') {
1140 c = chars[++i];
1141 }
1142 } else {
1143 return JsonToken.STRING;
1144 }
1145 }
1146
1147 if (i == offset + length) {
1148 return JsonToken.NUMBER;
1149 } else {
1150 return JsonToken.STRING;
Jesse Wilson76d7e202010-08-03 17:55:09 -07001151 }
1152 }
1153
1154 /**
1155 * Throws a new IO exception with the given message and a context snippet
1156 * with this reader's content.
1157 */
Jesse Wilsoneb97c0d2011-01-09 16:05:03 -08001158 private IOException syntaxError(String message) throws IOException {
Jesse Wilsonfebae4e2011-07-18 12:58:03 -07001159 throw new MalformedJsonException(message
1160 + " at line " + getLineNumber() + " column " + getColumnNumber());
Jesse Wilson76d7e202010-08-03 17:55:09 -07001161 }
1162
1163 private CharSequence getSnippet() {
1164 StringBuilder snippet = new StringBuilder();
1165 int beforePos = Math.min(pos, 20);
1166 snippet.append(buffer, pos - beforePos, beforePos);
1167 int afterPos = Math.min(limit - pos, 20);
1168 snippet.append(buffer, pos, afterPos);
1169 return snippet;
1170 }
Jesse Wilson76d7e202010-08-03 17:55:09 -07001171}