blob: 2df7b5a66247926a58a776f2eb1ced6f2598efef [file] [log] [blame]
Tatu Saloranta056bf5e2017-05-14 23:59:00 -07001package com.fasterxml.jackson.core.json.async;
2
3import java.io.*;
4import java.util.Arrays;
5
6import com.fasterxml.jackson.core.*;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -07007import com.fasterxml.jackson.core.base.ParserBase;
8import com.fasterxml.jackson.core.io.IOContext;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -07009import com.fasterxml.jackson.core.json.JsonReadContext;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070010import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
11
12/**
13 * Intermediate base class for non-blocking JSON parsers.
14 */
Tatu Saloranta81fb43c2017-05-17 18:25:34 -070015public abstract class NonBlockingJsonParserBase
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070016 extends ParserBase
17{
18 /*
19 /**********************************************************************
20 /* Major state constants
21 /**********************************************************************
22 */
23
24 /**
25 * State right after parser has been constructed, before seeing the first byte
26 * to know if there's header.
27 */
28 protected final static int MAJOR_INITIAL = 0;
29
30 /**
31 * State right after parser a root value has been
32 * finished, but next token has not yet been recognized.
33 */
34 protected final static int MAJOR_ROOT = 1;
35
Tatu Saloranta0728d422017-05-23 23:11:36 -070036 protected final static int MAJOR_OBJECT_FIELD_FIRST = 2;
37 protected final static int MAJOR_OBJECT_FIELD_NEXT = 3;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070038
Tatu Saloranta0728d422017-05-23 23:11:36 -070039 protected final static int MAJOR_OBJECT_VALUE = 4;
40
41 protected final static int MAJOR_ARRAY_ELEMENT_FIRST = 5;
42 protected final static int MAJOR_ARRAY_ELEMENT_NEXT = 6;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070043
44 /**
45 * State after non-blocking input source has indicated that no more input
46 * is forthcoming AND we have exhausted all the input
47 */
Tatu Saloranta0728d422017-05-23 23:11:36 -070048 protected final static int MAJOR_CLOSED = 7;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070049
50 /*
51 /**********************************************************************
52 /* Minor state constants
53 /**********************************************************************
54 */
55
56 /**
57 * State between root-level value, waiting for at least one white-space
58 * character as separator
59 */
Tatu Saloranta0728d422017-05-23 23:11:36 -070060 protected final static int MINOR_ROOT_NEED_SEPARATOR = 1;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070061
62 /**
63 * State between root-level value, having processed at least one white-space
64 * character, and expecting either more, start of a value, or end of input
65 * stream.
66 */
Tatu Saloranta0728d422017-05-23 23:11:36 -070067 protected final static int MINOR_ROOT_GOT_SEPARATOR = 2;
68
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070069 protected final static int MINOR_FIELD_NAME = 10;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070070
Tatu Saloranta0728d422017-05-23 23:11:36 -070071 protected final static int MINOR_VALUE_LEADING_WS = 15;
72 protected final static int MINOR_VALUE_LEADING_COMMA = 16;
73 protected final static int MINOR_VALUE_LEADING_COLON = 17;
74
75 protected final static int MINOR_VALUE_NUMBER = 20;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070076
Tatu Saloranta0728d422017-05-23 23:11:36 -070077 protected final static int MINOR_VALUE_STRING = 25;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070078
Tatu Saloranta0728d422017-05-23 23:11:36 -070079 protected final static int MINOR_VALUE_TOKEN_NULL = 30;
80 protected final static int MINOR_VALUE_TOKEN_TRUE = 31;
81 protected final static int MINOR_VALUE_TOKEN_FALSE = 32;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070082
Tatu Saloranta955e5c22017-05-23 17:02:30 -070083 /**
84 * Special state at which point decoding of a non-quoted token has encountered
85 * a problem; that is, either not matching fully (like "truf" instead of "true",
86 * at "tru"), or not having trailing separator (or end of input), like "trueful".
87 * Attempt is made, then, to decode likely full input token to report suitable
88 * error.
89 */
90 protected final static int MINOR_VALUE_TOKEN_ERROR = 19;
91
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070092 /*
93 /**********************************************************************
94 /* Helper objects, symbols (field names)
95 /**********************************************************************
96 */
97
98 /**
99 * Symbol table that contains field names encountered so far
100 */
101 final protected ByteQuadsCanonicalizer _symbols;
102
103 /**
104 * Temporary buffer used for name parsing.
105 */
106 protected int[] _quadBuffer = NO_INTS;
107
108 /**
109 * Quads used for hash calculation
110 */
111 protected int _quad1, _quad2;
112
113 /*
114 /**********************************************************************
115 /* Additional parsing state
116 /**********************************************************************
117 */
118
119 /**
120 * Current main decoding state
121 */
122 protected int _majorState;
123
124 /**
125 * Addition indicator within state; contextually relevant for just that state
126 */
127 protected int _minorState;
128
129 /**
130 * Value of {@link #_majorState} after completing a scalar value
131 */
132 protected int _majorStateAfterValue;
133
134 /**
135 * Flag that is sent when calling application indicates that there will
136 * be no more input to parse.
137 */
138 protected boolean _endOfInput = false;
139
140 /*
141 /**********************************************************************
142 /* Other buffering
143 /**********************************************************************
144 */
145
146 /**
147 * Temporary buffer for holding content if input not contiguous (but can
148 * fit in buffer)
149 */
150 protected byte[] _inputCopy;
151
152 /**
153 * Number of bytes buffered in <code>_inputCopy</code>
154 */
155 protected int _inputCopyLen;
156
157 /**
158 * Temporary storage for 32-bit values (int, float), as well as length markers
159 * for length-prefixed values.
160 */
161 protected int _pending32;
162
163 /**
164 * Temporary storage for 64-bit values (long, double), secondary storage
165 * for some other things (scale of BigDecimal values)
166 */
167 protected long _pending64;
168
169 /*
170 /**********************************************************************
171 /* Life-cycle
172 /**********************************************************************
173 */
174
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700175 public NonBlockingJsonParserBase(IOContext ctxt, int parserFeatures,
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700176 ByteQuadsCanonicalizer sym)
177 {
178 super(ctxt, parserFeatures);
179 _symbols = sym;
180 // We don't need a lot; for most things maximum known a-priori length below 70 bytes
181 _inputCopy = ctxt.allocReadIOBuffer(500);
182
183 _currToken = null;
184 _majorState = MAJOR_INITIAL;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700185 _majorStateAfterValue = MAJOR_ROOT;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700186 }
187
188 @Override
189 public ObjectCodec getCodec() {
190 return null;
191 }
192
193 @Override
194 public void setCodec(ObjectCodec c) {
195 throw new UnsupportedOperationException("Can not use ObjectMapper with non-blocking parser");
196 }
197
198 /**
199 * @since 2.9
200 */
201 @Override
202 public boolean canParseAsync() { return true; }
203
204 /*
205 /**********************************************************
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700206 /* Test support
207 /**********************************************************
208 */
209
210 protected ByteQuadsCanonicalizer symbolTableForTests() {
211 return _symbols;
212 }
213
214 /*
215 /**********************************************************
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700216 /* Abstract methods from JsonParser
217 /**********************************************************
218 */
219
220 @Override
221 public abstract int releaseBuffered(OutputStream out) throws IOException;
222
223 @Override
224 public Object getInputSource() {
225 // since input is "pushed", to traditional source...
226 return null;
227 }
228
229 @Override
230 protected void _closeInput() throws IOException {
231 // nothing to do here
232 }
233
234 /*
235 /**********************************************************************
236 /* Overridden methods
237 /**********************************************************************
238 */
239
240 @Override
241 public boolean hasTextCharacters()
242 {
243 if (_currToken == JsonToken.VALUE_STRING) {
244 // yes; is or can be made available efficiently as char[]
245 return _textBuffer.hasTextAsCharacters();
246 }
247 if (_currToken == JsonToken.FIELD_NAME) {
248 // not necessarily; possible but:
249 return _nameCopied;
250 }
251 // other types, no benefit from accessing as char[]
252 return false;
253 }
254
255 /*
256 /**********************************************************************
257 /* Public API, access to token information, text
258 /**********************************************************************
259 */
260
261 /**
262 * Method for accessing textual representation of the current event;
263 * if no current event (before first call to {@link #nextToken}, or
264 * after encountering end-of-input), returns null.
265 * Method can be called for any event.
266 */
267 @Override
268 public String getText() throws IOException
269 {
270 if (_currToken == JsonToken.VALUE_STRING) {
271 return _textBuffer.contentsAsString();
272 }
273 JsonToken t = _currToken;
274 if (t == null || _currToken == JsonToken.NOT_AVAILABLE) { // null only before/after document
275 return null;
276 }
277 if (t == JsonToken.FIELD_NAME) {
278 return _parsingContext.getCurrentName();
279 }
280 if (t.isNumeric()) {
281 // TODO: optimize?
282 return getNumberValue().toString();
283 }
284 return _currToken.asString();
285 }
286
287 @Override
288 public char[] getTextCharacters() throws IOException
289 {
290 switch (currentTokenId()) {
291 case JsonTokenId.ID_STRING:
292 return _textBuffer.getTextBuffer();
293 case JsonTokenId.ID_FIELD_NAME:
294 if (!_nameCopied) {
295 String name = _parsingContext.getCurrentName();
296 int nameLen = name.length();
297 if (_nameCopyBuffer == null) {
298 _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen);
299 } else if (_nameCopyBuffer.length < nameLen) {
300 _nameCopyBuffer = new char[nameLen];
301 }
302 name.getChars(0, nameLen, _nameCopyBuffer, 0);
303 _nameCopied = true;
304 }
305 return _nameCopyBuffer;
306 case JsonTokenId.ID_NUMBER_INT:
307 case JsonTokenId.ID_NUMBER_FLOAT:
308 return getNumberValue().toString().toCharArray();
309 case JsonTokenId.ID_NO_TOKEN:
310 case JsonTokenId.ID_NOT_AVAILABLE:
311 return null;
312 default:
313 return _currToken.asCharArray();
314 }
315 }
316
317 @Override
318 public int getTextLength() throws IOException
319 {
320 switch (currentTokenId()) {
321 case JsonTokenId.ID_STRING:
322 return _textBuffer.size();
323 case JsonTokenId.ID_FIELD_NAME:
324 return _parsingContext.getCurrentName().length();
325 case JsonTokenId.ID_NUMBER_INT:
326 case JsonTokenId.ID_NUMBER_FLOAT:
327 return getNumberValue().toString().length();
328 case JsonTokenId.ID_NO_TOKEN:
329 case JsonTokenId.ID_NOT_AVAILABLE:
330 return 0; // or throw exception?
331 default:
332 return _currToken.asCharArray().length;
333 }
334 }
335
336 @Override
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700337 public int getTextOffset() throws IOException {
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700338 return 0;
339 }
340
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700341 @Override
342 public int getText(Writer w) throws IOException
343 {
344 if (_currToken == JsonToken.VALUE_STRING) {
345 return _textBuffer.contentsToWriter(w);
346 }
347 if (_currToken == JsonToken.NOT_AVAILABLE) {
348 _reportError("Current token not available: can not call this method");
349 }
350 // otherwise default handling works fine
351 return super.getText(w);
352 }
353
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700354 /*
355 /**********************************************************************
356 /* Public API, access to token information, binary
357 /**********************************************************************
358 */
359
360 @Override
361 public byte[] getBinaryValue(Base64Variant b64variant) throws IOException
362 {
363 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
364 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
365 }
366 return _binaryValue;
367 }
368
369 @Override
370 public Object getEmbeddedObject() throws IOException
371 {
372 if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) {
373 return _binaryValue;
374 }
375 return null;
376 }
377
378 @Override
379 public int readBinaryValue(Base64Variant b64variant, OutputStream out)
380 throws IOException {
381 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
382 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
383 }
384 out.write(_binaryValue);
385 return _binaryValue.length;
386 }
387
388 /*
389 /**********************************************************************
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700390 /* Handling of nested scope, state
391 /**********************************************************************
392 */
393
394 protected final JsonToken _startArrayScope() throws IOException
395 {
396 _parsingContext = _parsingContext.createChildArrayContext(-1, -1);
Tatu Saloranta0728d422017-05-23 23:11:36 -0700397 _majorState = MAJOR_ARRAY_ELEMENT_FIRST;
398 _majorStateAfterValue = MAJOR_ARRAY_ELEMENT_NEXT;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700399 return (_currToken = JsonToken.START_ARRAY);
400 }
401
402 protected final JsonToken _startObjectScope() throws IOException
403 {
404 _parsingContext = _parsingContext.createChildObjectContext(-1, -1);
Tatu Saloranta0728d422017-05-23 23:11:36 -0700405 _majorState = MAJOR_OBJECT_FIELD_FIRST;
406 _majorStateAfterValue = MAJOR_OBJECT_FIELD_NEXT;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700407 return (_currToken = JsonToken.START_OBJECT);
408 }
409
410 protected final JsonToken _closeArrayScope() throws IOException
411 {
412 if (!_parsingContext.inArray()) {
413 _reportMismatchedEndMarker(']', '}');
414 }
415 JsonReadContext ctxt = _parsingContext.getParent();
416 _parsingContext = ctxt;
417 int st;
418 if (ctxt.inObject()) {
Tatu Saloranta0728d422017-05-23 23:11:36 -0700419 st = MAJOR_OBJECT_FIELD_NEXT;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700420 } else if (ctxt.inArray()) {
Tatu Saloranta0728d422017-05-23 23:11:36 -0700421 st = MAJOR_ARRAY_ELEMENT_NEXT;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700422 } else {
423 st = MAJOR_ROOT;
424 }
425 _majorState = st;
426 _majorStateAfterValue = st;
427 return (_currToken = JsonToken.END_ARRAY);
428 }
429
430 protected final JsonToken _closeObjectScope() throws IOException
431 {
432 if (!_parsingContext.inObject()) {
433 _reportMismatchedEndMarker('}', ']');
434 }
435 JsonReadContext ctxt = _parsingContext.getParent();
436 _parsingContext = ctxt;
437 int st;
438 if (ctxt.inObject()) {
Tatu Saloranta0728d422017-05-23 23:11:36 -0700439 st = MAJOR_OBJECT_FIELD_NEXT;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700440 } else if (ctxt.inArray()) {
Tatu Saloranta0728d422017-05-23 23:11:36 -0700441 st = MAJOR_ARRAY_ELEMENT_NEXT;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700442 } else {
443 st = MAJOR_ROOT;
444 }
445 _majorState = st;
446 _majorStateAfterValue = st;
447 return (_currToken = JsonToken.END_OBJECT);
448 }
449
450 /*
451 /**********************************************************************
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700452 /* Internal methods, field name parsing
453 /**********************************************************************
454 */
455
456 // Helper method for trying to find specified encoded UTF-8 byte sequence
457 // from symbol table; if successful avoids actual decoding to String
458 protected final String _findDecodedFromSymbols(byte[] inBuf, int inPtr, int len) throws IOException
459 {
460 // First: maybe we already have this name decoded?
461 if (len < 5) {
462 int q = inBuf[inPtr] & 0xFF;
463 if (--len > 0) {
464 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
465 if (--len > 0) {
466 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
467 if (--len > 0) {
468 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
469 }
470 }
471 }
472 _quad1 = q;
473 return _symbols.findName(q);
474 }
475 if (len < 9) {
476 // First quadbyte is easy
477 int q1 = (inBuf[inPtr] & 0xFF) << 8;
478 q1 += (inBuf[++inPtr] & 0xFF);
479 q1 <<= 8;
480 q1 += (inBuf[++inPtr] & 0xFF);
481 q1 <<= 8;
482 q1 += (inBuf[++inPtr] & 0xFF);
483 int q2 = (inBuf[++inPtr] & 0xFF);
484 len -= 5;
485 if (len > 0) {
486 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
487 if (--len > 0) {
488 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
489 if (--len > 0) {
490 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
491 }
492 }
493 }
494 _quad1 = q1;
495 _quad2 = q2;
496 return _symbols.findName(q1, q2);
497 }
498 return _findDecodedLonger(inBuf, inPtr, len);
499 }
500
501 // Method for locating names longer than 8 bytes (in UTF-8)
502 private final String _findDecodedLonger(byte[] inBuf, int inPtr, int len) throws IOException
503 {
504 // first, need enough buffer to store bytes as ints:
505 {
506 int bufLen = (len + 3) >> 2;
507 if (bufLen > _quadBuffer.length) {
508 _quadBuffer = Arrays.copyOf(_quadBuffer, bufLen+4);
509 }
510 }
511 // then decode, full quads first
512 int offset = 0;
513 do {
514 int q = (inBuf[inPtr++] & 0xFF) << 8;
515 q |= inBuf[inPtr++] & 0xFF;
516 q <<= 8;
517 q |= inBuf[inPtr++] & 0xFF;
518 q <<= 8;
519 q |= inBuf[inPtr++] & 0xFF;
520 _quadBuffer[offset++] = q;
521 } while ((len -= 4) > 3);
522 // and then leftovers
523 if (len > 0) {
524 int q = inBuf[inPtr] & 0xFF;
525 if (--len > 0) {
526 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
527 if (--len > 0) {
528 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
529 }
530 }
531 _quadBuffer[offset++] = q;
532 }
533 return _symbols.findName(_quadBuffer, offset);
534 }
535
536 protected final String _addDecodedToSymbols(int len, String name)
537 {
538 if (len < 5) {
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700539 return _symbols.addName(name, _quad1);
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700540 }
541 if (len < 9) {
542 return _symbols.addName(name, _quad1, _quad2);
543 }
544 int qlen = (len + 3) >> 2;
545 return _symbols.addName(name, _quadBuffer, qlen);
546 }
547
548 /*
549 /**********************************************************************
550 /* Internal methods, state changes
551 /**********************************************************************
552 */
553
554 /**
555 * Helper method called at point when all input has been exhausted and
556 * input feeder has indicated no more input will be forthcoming.
557 */
558 protected final JsonToken _eofAsNextToken() throws IOException {
559 _majorState = MAJOR_CLOSED;
560 if (!_parsingContext.inRoot()) {
561 _handleEOF();
562 }
563 close();
564 return (_currToken = null);
565 }
566
567 protected final JsonToken _valueComplete(JsonToken t) throws IOException
568 {
569 _majorState = _majorStateAfterValue;
570 _currToken = t;
571 return t;
572 }
573
574 /*
575 /**********************************************************************
576 /* Internal methods, error reporting
577 /**********************************************************************
578 */
579
580 protected void _reportInvalidInitial(int mask) throws JsonParseException {
581 _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask));
582 }
583
584 protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException {
585 _inputPtr = ptr;
586 _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask));
587 }
588}