blob: 6fb067b2f0be9c68b90dd6275f0087cf10d4fffc [file] [log] [blame]
Tatu Saloranta81fb43c2017-05-17 18:25:34 -07001package com.fasterxml.jackson.core.json.async;
2
3import java.io.IOException;
4import java.io.OutputStream;
5
6import com.fasterxml.jackson.core.JsonToken;
7import com.fasterxml.jackson.core.async.ByteArrayFeeder;
8import com.fasterxml.jackson.core.async.NonBlockingInputFeeder;
9import com.fasterxml.jackson.core.io.IOContext;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070010import com.fasterxml.jackson.core.json.ByteSourceJsonBootstrapper;
Tatu Saloranta81fb43c2017-05-17 18:25:34 -070011import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
12import com.fasterxml.jackson.core.util.VersionUtil;
13
14public class NonBlockingJsonParser
15 extends NonBlockingJsonParserBase
16 implements ByteArrayFeeder
17{
18 /*
19 /**********************************************************************
20 /* Input source config
21 /**********************************************************************
22 */
23
24 /**
25 * This buffer is actually provided via {@link NonBlockingInputFeeder}
26 */
27 protected byte[] _inputBuffer = NO_BYTES;
28
29 /**
30 * In addition to current buffer pointer, and end pointer,
31 * we will also need to know number of bytes originally
32 * contained. This is needed to correctly update location
33 * information when the block has been completed.
34 */
35 protected int _origBufferLen;
36
37 // And from ParserBase:
38// protected int _inputPtr;
39// protected int _inputEnd;
40
41 /*
42 /**********************************************************************
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070043 /* Location tracking, additional
44 /**********************************************************************
45 */
46
47 /**
48 * Alternate row tracker, used to keep track of position by `\r` marker
49 * (whereas <code>_currInputRow</code> tracks `\n`). Used to simplify
50 * tracking of linefeeds, assuming that input typically uses various
51 * linefeed combinations (`\r`, `\n` or `\r\n`) consistently, in which
52 * case we can simply choose max of two row candidates.
53 */
54 protected int _currInputRowAlt = 1;
55
56 /*
57 /**********************************************************************
58 /* Other state
59 /**********************************************************************
60 */
61
62 protected int _currentQuote;
63
64 /*
65 /**********************************************************************
Tatu Saloranta81fb43c2017-05-17 18:25:34 -070066 /* Life-cycle
67 /**********************************************************************
68 */
69
70 public NonBlockingJsonParser(IOContext ctxt, int parserFeatures,
71 ByteQuadsCanonicalizer sym)
72 {
73 super(ctxt, parserFeatures, sym);
74 }
75
76 /*
77 /**********************************************************************
78 /* AsyncInputFeeder impl
79 /**********************************************************************
80 */
81
82 @Override
83 public ByteArrayFeeder getNonBlockingInputFeeder() {
84 return this;
85 }
86
87 @Override
88 public final boolean needMoreInput() {
89 return (_inputPtr >=_inputEnd) && !_endOfInput;
90 }
91
92 @Override
93 public void feedInput(byte[] buf, int start, int end) throws IOException
94 {
95 // Must not have remaining input
96 if (_inputPtr < _inputEnd) {
97 _reportError("Still have %d undecoded bytes, should not call 'feedInput'", _inputEnd - _inputPtr);
98 }
99 if (end < start) {
100 _reportError("Input end (%d) may not be before start (%d)", end, start);
101 }
102 // and shouldn't have been marked as end-of-input
103 if (_endOfInput) {
104 _reportError("Already closed, can not feed more input");
105 }
106 // Time to update pointers first
107 _currInputProcessed += _origBufferLen;
108
109 // And then update buffer settings
110 _inputBuffer = buf;
111 _inputPtr = start;
112 _inputEnd = end;
113 _origBufferLen = end - start;
114 }
115
116 @Override
117 public void endOfInput() {
118 _endOfInput = true;
119 }
120
121 /*
122 /**********************************************************************
123 /* Abstract methods/overrides from JsonParser
124 /**********************************************************************
125 */
126
127 /* Implementing these methods efficiently for non-blocking cases would
128 * be complicated; so for now let's just use the default non-optimized
129 * implementation
130 */
131
132// public boolean nextFieldName(SerializableString str) throws IOException
133// public String nextTextValue() throws IOException
134// public int nextIntValue(int defaultValue) throws IOException
135// public long nextLongValue(long defaultValue) throws IOException
136// public Boolean nextBooleanValue() throws IOException
137
138 @Override
139 public int releaseBuffered(OutputStream out) throws IOException {
140 int avail = _inputEnd - _inputPtr;
141 if (avail > 0) {
142 out.write(_inputBuffer, _inputPtr, avail);
143 }
144 return avail;
145 }
146
147 /*
148 /**********************************************************************
149 /* Main-level decoding
150 /**********************************************************************
151 */
152
153 @Override
154 public JsonToken nextToken() throws IOException
155 {
156 // First: regardless of where we really are, need at least one more byte;
157 // can simplify some of the checks by short-circuiting right away
158 if (_inputPtr >= _inputEnd) {
159 if (_closed) {
160 return null;
161 }
162 // note: if so, do not even bother changing state
163 if (_endOfInput) { // except for this special case
164 return _eofAsNextToken();
165 }
166 return JsonToken.NOT_AVAILABLE;
167 }
168 // in the middle of tokenization?
169 if (_currToken == JsonToken.NOT_AVAILABLE) {
170 return _finishToken();
171 }
172
173 // No: fresh new token; may or may not have existing one
174 _numTypesValid = NR_UNKNOWN;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700175 _tokenInputTotal = _currInputProcessed + _inputPtr;
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700176 // also: clear any data retained so far
177 _binaryValue = null;
178 int ch = _inputBuffer[_inputPtr++];
179
180 switch (_majorState) {
181 case MAJOR_INITIAL:
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700182 return _startDocument(ch);
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700183
184 case MAJOR_ROOT:
185 return _startValue(ch);
186
187 case MAJOR_OBJECT_FIELD: // field or end-object
188 // expect name
189 return _startFieldName(ch);
190
191 case MAJOR_OBJECT_VALUE:
192 case MAJOR_ARRAY_ELEMENT: // element or end-array
193 return _startValue(ch);
194
195 default:
196 }
197 VersionUtil.throwInternal();
198 return null;
199 }
200
201 /**
202 * Method called when a (scalar) value type has been detected, but not all of
203 * contents have been decoded due to incomplete input available.
204 */
205 protected final JsonToken _finishToken() throws IOException
206 {
207 // NOTE: caller ensures availability of at least one byte
208
209 switch (_minorState) {
Tatu Saloranta955e5c22017-05-23 17:02:30 -0700210 case MINOR_VALUE_TOKEN_NULL:
211 return _finishKeywordToken("null", _pending32, JsonToken.VALUE_NULL);
212 case MINOR_VALUE_TOKEN_TRUE:
213 return _finishKeywordToken("null", _pending32, JsonToken.VALUE_TRUE);
214 case MINOR_VALUE_TOKEN_FALSE:
215 return _finishKeywordToken("null", _pending32, JsonToken.VALUE_FALSE);
216 case MINOR_VALUE_TOKEN_ERROR: // case of "almost token", just need tokenize for error
217 return _finishErrorToken();
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700218 }
219 return null;
220 }
221
222 /*
223 /**********************************************************************
224 /* Second-level decoding, root level
225 /**********************************************************************
226 */
227
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700228 private final JsonToken _startDocument(int ch) throws IOException
229 {
230 ch &= 0xFF;
231
232 // Very first byte: could be BOM
233 if (ch == ByteSourceJsonBootstrapper.UTF8_BOM_1) {
234 // !!! TODO
235 }
236
237 // If not BOM (or we got past it), could be whitespace or comment to skip
238 while (ch <= 0x020) {
239 if (ch != INT_SPACE) {
240 if (ch == INT_LF) {
241 ++_currInputRow;
242 _currInputRowStart = _inputPtr;
243 } else if (ch == INT_CR) {
244 ++_currInputRowAlt;
245 _currInputRowStart = _inputPtr;
246 } else if (ch != INT_TAB) {
247 _throwInvalidSpace(ch);
248 }
249 }
250 if (_inputPtr >= _inputEnd) {
251 _minorState = MINOR_FIELD_ROOT_GOT_SEPARATOR;
252 if (_closed) {
253 return null;
254 }
255 // note: if so, do not even bother changing state
256 if (_endOfInput) { // except for this special case
257 return _eofAsNextToken();
258 }
259 return JsonToken.NOT_AVAILABLE;
260 }
261 ch = _inputBuffer[_inputPtr++] & 0xFF;
262 }
263 return _startValue(ch);
264 }
265
266 /*
267 /**********************************************************************
268 /* Second-level decoding, value parsing
269 /**********************************************************************
270 */
271
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700272 /**
273 * Helper method called to detect type of a value token (at any level), and possibly
274 * decode it if contained in input buffer.
275 * Note that possible header has been ruled out by caller and is not checked here.
276 */
277 private final JsonToken _startValue(int ch) throws IOException
278 {
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700279 if (ch == INT_QUOTE) {
280 return _startString(ch);
281 }
282 switch (ch) {
283 case '-':
284 return _startNegativeNumber();
285
286 // Should we have separate handling for plus? Although
287 // it is not allowed per se, it may be erroneously used,
288 // and could be indicate by a more specific error message.
289 case '0':
290 case '1':
291 case '2':
292 case '3':
293 case '4':
294 case '5':
295 case '6':
296 case '7':
297 case '8':
298 case '9':
299 return _startPositiveNumber(ch);
300 case 'f':
301 return _startFalseToken();
302 case 'n':
303 return _startNullToken();
304 case 't':
305 return _startTrueToken();
306 case '[':
307 return _startArrayScope();
308 case ']':
309 return _closeArrayScope();
310 case '{':
311 return _startObjectScope();
312 case '}':
313 return _closeObjectScope();
314 default:
315 }
316 return _startUnexpectedValue(ch);
317 }
318
319 protected JsonToken _startUnexpectedValue(int ch) throws IOException
320 {
321 // TODO: Maybe support non-standard tokens that streaming parser does:
322 //
323 // * NaN
324 // * Infinity
325 // * Plus-prefix for numbers
326 // * Apostrophe for Strings
327
328 switch (ch) {
329 case '\'':
330 return _startString(ch);
331
332 case ',':
333 // If Feature.ALLOW_MISSING_VALUES is enabled we may allow "missing values",
334 // that is, encountering a trailing comma or closing marker where value would be expected
335 if (!_parsingContext.inObject() && isEnabled(Feature.ALLOW_MISSING_VALUES)) {
336 // Important to "push back" separator, to be consumed before next value;
337 // does not lead to infinite loop
338 --_inputPtr;
339 return _valueComplete(JsonToken.VALUE_NULL);
340 }
341 break;
342 }
343 // !!! TODO: maybe try to collect more information for better diagnostics
344 _reportUnexpectedChar(ch, "expected a valid value (number, String, array, object, 'true', 'false' or 'null')");
345 return null;
346 }
347
348 /*
349 /**********************************************************************
350 /* Second-level decoding, simple tokens
351 /**********************************************************************
352 */
353
354 protected JsonToken _startFalseToken() throws IOException
355 {
Tatu Saloranta955e5c22017-05-23 17:02:30 -0700356 int ptr = _inputPtr;
357 if ((ptr + 4) < _inputEnd) { // yes, can determine efficiently
358 byte[] buf = _inputBuffer;
359 if ((buf[ptr++] == 'a')
360 && (buf[ptr++] == 'l')
361 && (buf[ptr++] == 's')
362 && (buf[ptr++] == 'e')) {
363 int ch = buf[ptr] & 0xFF;
364 if (ch < INT_0 || (ch == INT_RBRACKET) || (ch == INT_RCURLY)) { // expected/allowed chars
365 _inputPtr = ptr;
366 return _valueComplete(JsonToken.VALUE_FALSE);
367 }
368 }
369 }
370 _minorState = MINOR_VALUE_TOKEN_FALSE;
371 return _finishKeywordToken("false", 1, JsonToken.VALUE_FALSE);
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700372 }
373
374 protected JsonToken _startTrueToken() throws IOException
375 {
Tatu Saloranta955e5c22017-05-23 17:02:30 -0700376 int ptr = _inputPtr;
377 if ((ptr + 3) < _inputEnd) { // yes, can determine efficiently
378 byte[] buf = _inputBuffer;
379 if ((buf[ptr++] == 'r')
380 && (buf[ptr++] == 'u')
381 && (buf[ptr++] == 'e')) {
382 int ch = buf[ptr] & 0xFF;
383 if (ch < INT_0 || (ch == INT_RBRACKET) || (ch == INT_RCURLY)) { // expected/allowed chars
384 _inputPtr = ptr;
385 return _valueComplete(JsonToken.VALUE_TRUE);
386 }
387 }
388 }
389 _minorState = MINOR_VALUE_TOKEN_TRUE;
390 return _finishKeywordToken("true", 1, JsonToken.VALUE_TRUE);
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700391 }
392
393 protected JsonToken _startNullToken() throws IOException
394 {
Tatu Saloranta955e5c22017-05-23 17:02:30 -0700395 int ptr = _inputPtr;
396 if ((ptr + 3) < _inputEnd) { // yes, can determine efficiently
397 byte[] buf = _inputBuffer;
398 if ((buf[ptr++] == 'u')
399 && (buf[ptr++] == 'l')
400 && (buf[ptr++] == 'l')) {
401 int ch = buf[ptr] & 0xFF;
402 if (ch < INT_0 || (ch == INT_RBRACKET) || (ch == INT_RCURLY)) { // expected/allowed chars
403 _inputPtr = ptr;
404 return _valueComplete(JsonToken.VALUE_NULL);
405 }
406 }
407 }
408 _minorState = MINOR_VALUE_TOKEN_TRUE;
409 return _finishKeywordToken("null", 1, JsonToken.VALUE_NULL);
410 }
411
412 protected JsonToken _finishKeywordToken(String expToken, int matched,
413 JsonToken result) throws IOException
414 {
415 final int end = expToken.length();
416
417 while (true) {
418 if (_inputPtr >= _inputEnd) {
419 _pending32 = matched;
420 return (_currToken = JsonToken.NOT_AVAILABLE);
421 }
422 int ch = _inputBuffer[_inputPtr] & 0xFF;
423 if (matched == end) { // need to verify trailing separator
424 if (ch < INT_0 || (ch == INT_RBRACKET) || (ch == INT_RCURLY)) { // expected/allowed chars
425 return _valueComplete(JsonToken.VALUE_NULL);
426 }
427 }
428 if (ch != expToken.charAt(matched)) {
429 break;
430 }
431 ++_inputPtr;
432 }
433 _minorState = MINOR_VALUE_TOKEN_ERROR;
434 _textBuffer.resetWithString(expToken.substring(0, matched));
435 return _finishErrorToken();
436 }
437
438 protected JsonToken _finishErrorToken() throws IOException
439 {
440 while (_inputPtr < _inputEnd) {
441 int i = (int) _inputBuffer[_inputPtr++];
442
443// !!! TODO: Decode UTF-8 characters properly...
444// char c = (char) _decodeCharForError(i);
445
446 char ch = (char) i;
447 if (Character.isJavaIdentifierPart(ch)) {
448 // 11-Jan-2016, tatu: note: we will fully consume the character,
449 // included or not, so if recovery was possible, it'd be off-by-one...
450 _textBuffer.append(ch);
451 if (_textBuffer.size() < MAX_ERROR_TOKEN_LENGTH) {
452 continue;
453 }
454 }
455 _reportError("Unrecognized token '%s': was expecting %s", _textBuffer.contentsAsString(),
456 "'null', 'true' or 'false'");
457 }
458 return (_currToken = JsonToken.NOT_AVAILABLE);
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700459 }
460
461 /*
462 /**********************************************************************
463 /* Second-level decoding, String decoding
464 /**********************************************************************
465 */
466
467 protected JsonToken _startString(int q) throws IOException
468 {
469 _currentQuote = q;
470 return null;
471 }
472
473 /*
474 /**********************************************************************
475 /* Second-level decoding, String decoding
476 /**********************************************************************
477 */
478
479 protected JsonToken _startPositiveNumber(int ch) throws IOException
480 {
481 return null;
482 }
483
484 protected JsonToken _startNegativeNumber() throws IOException
485 {
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700486 return null;
487 }
488
489 /*
490 /**********************************************************************
491 /* Second-level decoding, Name decoding
492 /**********************************************************************
493 */
494
495 /**
496 * Method that handles initial token type recognition for token
497 * that has to be either FIELD_NAME or END_OBJECT.
498 */
499 protected final JsonToken _startFieldName(int ch) throws IOException
500 {
501 return null;
502 }
503}