blob: 385e0fedcafcaccf45b7feb33ecdbcb68e5d8add [file] [log] [blame]
Tatu Saloranta056bf5e2017-05-14 23:59:00 -07001package com.fasterxml.jackson.core.json.async;
2
3import java.io.*;
4import java.util.Arrays;
5
6import com.fasterxml.jackson.core.*;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -07007import com.fasterxml.jackson.core.base.ParserBase;
8import com.fasterxml.jackson.core.io.IOContext;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -07009import com.fasterxml.jackson.core.json.JsonReadContext;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070010import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
11
12/**
13 * Intermediate base class for non-blocking JSON parsers.
14 */
Tatu Saloranta81fb43c2017-05-17 18:25:34 -070015public abstract class NonBlockingJsonParserBase
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070016 extends ParserBase
17{
18 /*
19 /**********************************************************************
20 /* Major state constants
21 /**********************************************************************
22 */
23
24 /**
25 * State right after parser has been constructed, before seeing the first byte
26 * to know if there's header.
27 */
28 protected final static int MAJOR_INITIAL = 0;
29
30 /**
31 * State right after parser a root value has been
32 * finished, but next token has not yet been recognized.
33 */
34 protected final static int MAJOR_ROOT = 1;
35
36 protected final static int MAJOR_OBJECT_FIELD = 2;
37 protected final static int MAJOR_OBJECT_VALUE = 3;
38
39 protected final static int MAJOR_ARRAY_ELEMENT = 4;
40
41 /**
42 * State after non-blocking input source has indicated that no more input
43 * is forthcoming AND we have exhausted all the input
44 */
45 protected final static int MAJOR_CLOSED = 5;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070046
47 /*
48 /**********************************************************************
49 /* Minor state constants
50 /**********************************************************************
51 */
52
53 /**
54 * State between root-level value, waiting for at least one white-space
55 * character as separator
56 */
57 protected final static int MINOR_FIELD_ROOT_NEED_SEPARATOR = 1;
58
59 /**
60 * State between root-level value, having processed at least one white-space
61 * character, and expecting either more, start of a value, or end of input
62 * stream.
63 */
64 protected final static int MINOR_FIELD_ROOT_GOT_SEPARATOR = 2;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070065
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070066 protected final static int MINOR_FIELD_NAME = 10;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070067
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070068 protected final static int MINOR_VALUE_NUMBER = 11;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070069
70 protected final static int MINOR_VALUE_STRING = 15;
71
Tatu Saloranta955e5c22017-05-23 17:02:30 -070072 protected final static int MINOR_VALUE_TOKEN_NULL = 16;
73 protected final static int MINOR_VALUE_TOKEN_TRUE = 17;
74 protected final static int MINOR_VALUE_TOKEN_FALSE = 18;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070075
Tatu Saloranta955e5c22017-05-23 17:02:30 -070076 /**
77 * Special state at which point decoding of a non-quoted token has encountered
78 * a problem; that is, either not matching fully (like "truf" instead of "true",
79 * at "tru"), or not having trailing separator (or end of input), like "trueful".
80 * Attempt is made, then, to decode likely full input token to report suitable
81 * error.
82 */
83 protected final static int MINOR_VALUE_TOKEN_ERROR = 19;
84
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070085 /*
86 /**********************************************************************
87 /* Helper objects, symbols (field names)
88 /**********************************************************************
89 */
90
91 /**
92 * Symbol table that contains field names encountered so far
93 */
94 final protected ByteQuadsCanonicalizer _symbols;
95
96 /**
97 * Temporary buffer used for name parsing.
98 */
99 protected int[] _quadBuffer = NO_INTS;
100
101 /**
102 * Quads used for hash calculation
103 */
104 protected int _quad1, _quad2;
105
106 /*
107 /**********************************************************************
108 /* Additional parsing state
109 /**********************************************************************
110 */
111
112 /**
113 * Current main decoding state
114 */
115 protected int _majorState;
116
117 /**
118 * Addition indicator within state; contextually relevant for just that state
119 */
120 protected int _minorState;
121
122 /**
123 * Value of {@link #_majorState} after completing a scalar value
124 */
125 protected int _majorStateAfterValue;
126
127 /**
128 * Flag that is sent when calling application indicates that there will
129 * be no more input to parse.
130 */
131 protected boolean _endOfInput = false;
132
133 /*
134 /**********************************************************************
135 /* Other buffering
136 /**********************************************************************
137 */
138
139 /**
140 * Temporary buffer for holding content if input not contiguous (but can
141 * fit in buffer)
142 */
143 protected byte[] _inputCopy;
144
145 /**
146 * Number of bytes buffered in <code>_inputCopy</code>
147 */
148 protected int _inputCopyLen;
149
150 /**
151 * Temporary storage for 32-bit values (int, float), as well as length markers
152 * for length-prefixed values.
153 */
154 protected int _pending32;
155
156 /**
157 * Temporary storage for 64-bit values (long, double), secondary storage
158 * for some other things (scale of BigDecimal values)
159 */
160 protected long _pending64;
161
162 /*
163 /**********************************************************************
164 /* Life-cycle
165 /**********************************************************************
166 */
167
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700168 public NonBlockingJsonParserBase(IOContext ctxt, int parserFeatures,
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700169 ByteQuadsCanonicalizer sym)
170 {
171 super(ctxt, parserFeatures);
172 _symbols = sym;
173 // We don't need a lot; for most things maximum known a-priori length below 70 bytes
174 _inputCopy = ctxt.allocReadIOBuffer(500);
175
176 _currToken = null;
177 _majorState = MAJOR_INITIAL;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700178 _majorStateAfterValue = MAJOR_ROOT;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700179 }
180
181 @Override
182 public ObjectCodec getCodec() {
183 return null;
184 }
185
186 @Override
187 public void setCodec(ObjectCodec c) {
188 throw new UnsupportedOperationException("Can not use ObjectMapper with non-blocking parser");
189 }
190
191 /**
192 * @since 2.9
193 */
194 @Override
195 public boolean canParseAsync() { return true; }
196
197 /*
198 /**********************************************************
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700199 /* Test support
200 /**********************************************************
201 */
202
203 protected ByteQuadsCanonicalizer symbolTableForTests() {
204 return _symbols;
205 }
206
207 /*
208 /**********************************************************
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700209 /* Abstract methods from JsonParser
210 /**********************************************************
211 */
212
213 @Override
214 public abstract int releaseBuffered(OutputStream out) throws IOException;
215
216 @Override
217 public Object getInputSource() {
218 // since input is "pushed", to traditional source...
219 return null;
220 }
221
222 @Override
223 protected void _closeInput() throws IOException {
224 // nothing to do here
225 }
226
227 /*
228 /**********************************************************************
229 /* Overridden methods
230 /**********************************************************************
231 */
232
233 @Override
234 public boolean hasTextCharacters()
235 {
236 if (_currToken == JsonToken.VALUE_STRING) {
237 // yes; is or can be made available efficiently as char[]
238 return _textBuffer.hasTextAsCharacters();
239 }
240 if (_currToken == JsonToken.FIELD_NAME) {
241 // not necessarily; possible but:
242 return _nameCopied;
243 }
244 // other types, no benefit from accessing as char[]
245 return false;
246 }
247
248 /*
249 /**********************************************************************
250 /* Public API, access to token information, text
251 /**********************************************************************
252 */
253
254 /**
255 * Method for accessing textual representation of the current event;
256 * if no current event (before first call to {@link #nextToken}, or
257 * after encountering end-of-input), returns null.
258 * Method can be called for any event.
259 */
260 @Override
261 public String getText() throws IOException
262 {
263 if (_currToken == JsonToken.VALUE_STRING) {
264 return _textBuffer.contentsAsString();
265 }
266 JsonToken t = _currToken;
267 if (t == null || _currToken == JsonToken.NOT_AVAILABLE) { // null only before/after document
268 return null;
269 }
270 if (t == JsonToken.FIELD_NAME) {
271 return _parsingContext.getCurrentName();
272 }
273 if (t.isNumeric()) {
274 // TODO: optimize?
275 return getNumberValue().toString();
276 }
277 return _currToken.asString();
278 }
279
280 @Override
281 public char[] getTextCharacters() throws IOException
282 {
283 switch (currentTokenId()) {
284 case JsonTokenId.ID_STRING:
285 return _textBuffer.getTextBuffer();
286 case JsonTokenId.ID_FIELD_NAME:
287 if (!_nameCopied) {
288 String name = _parsingContext.getCurrentName();
289 int nameLen = name.length();
290 if (_nameCopyBuffer == null) {
291 _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen);
292 } else if (_nameCopyBuffer.length < nameLen) {
293 _nameCopyBuffer = new char[nameLen];
294 }
295 name.getChars(0, nameLen, _nameCopyBuffer, 0);
296 _nameCopied = true;
297 }
298 return _nameCopyBuffer;
299 case JsonTokenId.ID_NUMBER_INT:
300 case JsonTokenId.ID_NUMBER_FLOAT:
301 return getNumberValue().toString().toCharArray();
302 case JsonTokenId.ID_NO_TOKEN:
303 case JsonTokenId.ID_NOT_AVAILABLE:
304 return null;
305 default:
306 return _currToken.asCharArray();
307 }
308 }
309
310 @Override
311 public int getTextLength() throws IOException
312 {
313 switch (currentTokenId()) {
314 case JsonTokenId.ID_STRING:
315 return _textBuffer.size();
316 case JsonTokenId.ID_FIELD_NAME:
317 return _parsingContext.getCurrentName().length();
318 case JsonTokenId.ID_NUMBER_INT:
319 case JsonTokenId.ID_NUMBER_FLOAT:
320 return getNumberValue().toString().length();
321 case JsonTokenId.ID_NO_TOKEN:
322 case JsonTokenId.ID_NOT_AVAILABLE:
323 return 0; // or throw exception?
324 default:
325 return _currToken.asCharArray().length;
326 }
327 }
328
329 @Override
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700330 public int getTextOffset() throws IOException {
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700331 return 0;
332 }
333
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700334 @Override
335 public int getText(Writer w) throws IOException
336 {
337 if (_currToken == JsonToken.VALUE_STRING) {
338 return _textBuffer.contentsToWriter(w);
339 }
340 if (_currToken == JsonToken.NOT_AVAILABLE) {
341 _reportError("Current token not available: can not call this method");
342 }
343 // otherwise default handling works fine
344 return super.getText(w);
345 }
346
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700347 /*
348 /**********************************************************************
349 /* Public API, access to token information, binary
350 /**********************************************************************
351 */
352
353 @Override
354 public byte[] getBinaryValue(Base64Variant b64variant) throws IOException
355 {
356 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
357 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
358 }
359 return _binaryValue;
360 }
361
362 @Override
363 public Object getEmbeddedObject() throws IOException
364 {
365 if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) {
366 return _binaryValue;
367 }
368 return null;
369 }
370
371 @Override
372 public int readBinaryValue(Base64Variant b64variant, OutputStream out)
373 throws IOException {
374 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
375 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
376 }
377 out.write(_binaryValue);
378 return _binaryValue.length;
379 }
380
381 /*
382 /**********************************************************************
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700383 /* Handling of nested scope, state
384 /**********************************************************************
385 */
386
387 protected final JsonToken _startArrayScope() throws IOException
388 {
389 _parsingContext = _parsingContext.createChildArrayContext(-1, -1);
390 _majorState = MAJOR_ARRAY_ELEMENT;
391 _majorStateAfterValue = MAJOR_ARRAY_ELEMENT;
392 return (_currToken = JsonToken.START_ARRAY);
393 }
394
395 protected final JsonToken _startObjectScope() throws IOException
396 {
397 _parsingContext = _parsingContext.createChildObjectContext(-1, -1);
398 _majorState = MAJOR_OBJECT_FIELD;
399 _majorStateAfterValue = MAJOR_OBJECT_FIELD;
400 return (_currToken = JsonToken.START_OBJECT);
401 }
402
403 protected final JsonToken _closeArrayScope() throws IOException
404 {
405 if (!_parsingContext.inArray()) {
406 _reportMismatchedEndMarker(']', '}');
407 }
408 JsonReadContext ctxt = _parsingContext.getParent();
409 _parsingContext = ctxt;
410 int st;
411 if (ctxt.inObject()) {
412 st = MAJOR_OBJECT_FIELD;
413 } else if (ctxt.inArray()) {
414 st = MAJOR_ARRAY_ELEMENT;
415 } else {
416 st = MAJOR_ROOT;
417 }
418 _majorState = st;
419 _majorStateAfterValue = st;
420 return (_currToken = JsonToken.END_ARRAY);
421 }
422
423 protected final JsonToken _closeObjectScope() throws IOException
424 {
425 if (!_parsingContext.inObject()) {
426 _reportMismatchedEndMarker('}', ']');
427 }
428 JsonReadContext ctxt = _parsingContext.getParent();
429 _parsingContext = ctxt;
430 int st;
431 if (ctxt.inObject()) {
432 st = MAJOR_OBJECT_FIELD;
433 } else if (ctxt.inArray()) {
434 st = MAJOR_ARRAY_ELEMENT;
435 } else {
436 st = MAJOR_ROOT;
437 }
438 _majorState = st;
439 _majorStateAfterValue = st;
440 return (_currToken = JsonToken.END_OBJECT);
441 }
442
443 /*
444 /**********************************************************************
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700445 /* Internal methods, field name parsing
446 /**********************************************************************
447 */
448
449 // Helper method for trying to find specified encoded UTF-8 byte sequence
450 // from symbol table; if successful avoids actual decoding to String
451 protected final String _findDecodedFromSymbols(byte[] inBuf, int inPtr, int len) throws IOException
452 {
453 // First: maybe we already have this name decoded?
454 if (len < 5) {
455 int q = inBuf[inPtr] & 0xFF;
456 if (--len > 0) {
457 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
458 if (--len > 0) {
459 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
460 if (--len > 0) {
461 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
462 }
463 }
464 }
465 _quad1 = q;
466 return _symbols.findName(q);
467 }
468 if (len < 9) {
469 // First quadbyte is easy
470 int q1 = (inBuf[inPtr] & 0xFF) << 8;
471 q1 += (inBuf[++inPtr] & 0xFF);
472 q1 <<= 8;
473 q1 += (inBuf[++inPtr] & 0xFF);
474 q1 <<= 8;
475 q1 += (inBuf[++inPtr] & 0xFF);
476 int q2 = (inBuf[++inPtr] & 0xFF);
477 len -= 5;
478 if (len > 0) {
479 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
480 if (--len > 0) {
481 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
482 if (--len > 0) {
483 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
484 }
485 }
486 }
487 _quad1 = q1;
488 _quad2 = q2;
489 return _symbols.findName(q1, q2);
490 }
491 return _findDecodedLonger(inBuf, inPtr, len);
492 }
493
494 // Method for locating names longer than 8 bytes (in UTF-8)
495 private final String _findDecodedLonger(byte[] inBuf, int inPtr, int len) throws IOException
496 {
497 // first, need enough buffer to store bytes as ints:
498 {
499 int bufLen = (len + 3) >> 2;
500 if (bufLen > _quadBuffer.length) {
501 _quadBuffer = Arrays.copyOf(_quadBuffer, bufLen+4);
502 }
503 }
504 // then decode, full quads first
505 int offset = 0;
506 do {
507 int q = (inBuf[inPtr++] & 0xFF) << 8;
508 q |= inBuf[inPtr++] & 0xFF;
509 q <<= 8;
510 q |= inBuf[inPtr++] & 0xFF;
511 q <<= 8;
512 q |= inBuf[inPtr++] & 0xFF;
513 _quadBuffer[offset++] = q;
514 } while ((len -= 4) > 3);
515 // and then leftovers
516 if (len > 0) {
517 int q = inBuf[inPtr] & 0xFF;
518 if (--len > 0) {
519 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
520 if (--len > 0) {
521 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
522 }
523 }
524 _quadBuffer[offset++] = q;
525 }
526 return _symbols.findName(_quadBuffer, offset);
527 }
528
529 protected final String _addDecodedToSymbols(int len, String name)
530 {
531 if (len < 5) {
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700532 return _symbols.addName(name, _quad1);
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700533 }
534 if (len < 9) {
535 return _symbols.addName(name, _quad1, _quad2);
536 }
537 int qlen = (len + 3) >> 2;
538 return _symbols.addName(name, _quadBuffer, qlen);
539 }
540
541 /*
542 /**********************************************************************
543 /* Internal methods, state changes
544 /**********************************************************************
545 */
546
547 /**
548 * Helper method called at point when all input has been exhausted and
549 * input feeder has indicated no more input will be forthcoming.
550 */
551 protected final JsonToken _eofAsNextToken() throws IOException {
552 _majorState = MAJOR_CLOSED;
553 if (!_parsingContext.inRoot()) {
554 _handleEOF();
555 }
556 close();
557 return (_currToken = null);
558 }
559
560 protected final JsonToken _valueComplete(JsonToken t) throws IOException
561 {
562 _majorState = _majorStateAfterValue;
563 _currToken = t;
564 return t;
565 }
566
567 /*
568 /**********************************************************************
569 /* Internal methods, error reporting
570 /**********************************************************************
571 */
572
573 protected void _reportInvalidInitial(int mask) throws JsonParseException {
574 _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask));
575 }
576
577 protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException {
578 _inputPtr = ptr;
579 _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask));
580 }
581}