blob: 2482c07d3888b3b94f8614ec514568fed616c51f [file] [log] [blame]
Tatu Saloranta056bf5e2017-05-14 23:59:00 -07001package com.fasterxml.jackson.core.json.async;
2
3import java.io.*;
4import java.util.Arrays;
5
6import com.fasterxml.jackson.core.*;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -07007import com.fasterxml.jackson.core.base.ParserBase;
8import com.fasterxml.jackson.core.io.IOContext;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -07009import com.fasterxml.jackson.core.json.JsonReadContext;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070010import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
11
12/**
13 * Intermediate base class for non-blocking JSON parsers.
14 */
Tatu Saloranta81fb43c2017-05-17 18:25:34 -070015public abstract class NonBlockingJsonParserBase
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070016 extends ParserBase
17{
18 /*
19 /**********************************************************************
20 /* Major state constants
21 /**********************************************************************
22 */
23
24 /**
25 * State right after parser has been constructed, before seeing the first byte
26 * to know if there's header.
27 */
28 protected final static int MAJOR_INITIAL = 0;
29
30 /**
31 * State right after parser a root value has been
32 * finished, but next token has not yet been recognized.
33 */
34 protected final static int MAJOR_ROOT = 1;
35
36 protected final static int MAJOR_OBJECT_FIELD = 2;
37 protected final static int MAJOR_OBJECT_VALUE = 3;
38
39 protected final static int MAJOR_ARRAY_ELEMENT = 4;
40
41 /**
42 * State after non-blocking input source has indicated that no more input
43 * is forthcoming AND we have exhausted all the input
44 */
45 protected final static int MAJOR_CLOSED = 5;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070046
47 /*
48 /**********************************************************************
49 /* Minor state constants
50 /**********************************************************************
51 */
52
53 /**
54 * State between root-level value, waiting for at least one white-space
55 * character as separator
56 */
57 protected final static int MINOR_FIELD_ROOT_NEED_SEPARATOR = 1;
58
59 /**
60 * State between root-level value, having processed at least one white-space
61 * character, and expecting either more, start of a value, or end of input
62 * stream.
63 */
64 protected final static int MINOR_FIELD_ROOT_GOT_SEPARATOR = 2;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070065
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070066 protected final static int MINOR_FIELD_NAME = 10;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070067
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -070068 protected final static int MINOR_VALUE_NUMBER = 11;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070069
70 protected final static int MINOR_VALUE_STRING = 15;
71
72 protected final static int MINOR_VALUE_TOKEN_NULL = 15;
73 protected final static int MINOR_VALUE_TOKEN_TRUE = 15;
74 protected final static int MINOR_VALUE_TOKEN_FALSE = 15;
75
76 /*
77 /**********************************************************************
78 /* Helper objects, symbols (field names)
79 /**********************************************************************
80 */
81
82 /**
83 * Symbol table that contains field names encountered so far
84 */
85 final protected ByteQuadsCanonicalizer _symbols;
86
87 /**
88 * Temporary buffer used for name parsing.
89 */
90 protected int[] _quadBuffer = NO_INTS;
91
92 /**
93 * Quads used for hash calculation
94 */
95 protected int _quad1, _quad2;
96
97 /*
98 /**********************************************************************
99 /* Additional parsing state
100 /**********************************************************************
101 */
102
103 /**
104 * Current main decoding state
105 */
106 protected int _majorState;
107
108 /**
109 * Addition indicator within state; contextually relevant for just that state
110 */
111 protected int _minorState;
112
113 /**
114 * Value of {@link #_majorState} after completing a scalar value
115 */
116 protected int _majorStateAfterValue;
117
118 /**
119 * Flag that is sent when calling application indicates that there will
120 * be no more input to parse.
121 */
122 protected boolean _endOfInput = false;
123
124 /*
125 /**********************************************************************
126 /* Other buffering
127 /**********************************************************************
128 */
129
130 /**
131 * Temporary buffer for holding content if input not contiguous (but can
132 * fit in buffer)
133 */
134 protected byte[] _inputCopy;
135
136 /**
137 * Number of bytes buffered in <code>_inputCopy</code>
138 */
139 protected int _inputCopyLen;
140
141 /**
142 * Temporary storage for 32-bit values (int, float), as well as length markers
143 * for length-prefixed values.
144 */
145 protected int _pending32;
146
147 /**
148 * Temporary storage for 64-bit values (long, double), secondary storage
149 * for some other things (scale of BigDecimal values)
150 */
151 protected long _pending64;
152
153 /*
154 /**********************************************************************
155 /* Life-cycle
156 /**********************************************************************
157 */
158
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700159 public NonBlockingJsonParserBase(IOContext ctxt, int parserFeatures,
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700160 ByteQuadsCanonicalizer sym)
161 {
162 super(ctxt, parserFeatures);
163 _symbols = sym;
164 // We don't need a lot; for most things maximum known a-priori length below 70 bytes
165 _inputCopy = ctxt.allocReadIOBuffer(500);
166
167 _currToken = null;
168 _majorState = MAJOR_INITIAL;
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700169 _majorStateAfterValue = MAJOR_ROOT;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700170 }
171
172 @Override
173 public ObjectCodec getCodec() {
174 return null;
175 }
176
177 @Override
178 public void setCodec(ObjectCodec c) {
179 throw new UnsupportedOperationException("Can not use ObjectMapper with non-blocking parser");
180 }
181
182 /**
183 * @since 2.9
184 */
185 @Override
186 public boolean canParseAsync() { return true; }
187
188 /*
189 /**********************************************************
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700190 /* Test support
191 /**********************************************************
192 */
193
194 protected ByteQuadsCanonicalizer symbolTableForTests() {
195 return _symbols;
196 }
197
198 /*
199 /**********************************************************
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700200 /* Abstract methods from JsonParser
201 /**********************************************************
202 */
203
204 @Override
205 public abstract int releaseBuffered(OutputStream out) throws IOException;
206
207 @Override
208 public Object getInputSource() {
209 // since input is "pushed", to traditional source...
210 return null;
211 }
212
213 @Override
214 protected void _closeInput() throws IOException {
215 // nothing to do here
216 }
217
218 /*
219 /**********************************************************************
220 /* Overridden methods
221 /**********************************************************************
222 */
223
224 @Override
225 public boolean hasTextCharacters()
226 {
227 if (_currToken == JsonToken.VALUE_STRING) {
228 // yes; is or can be made available efficiently as char[]
229 return _textBuffer.hasTextAsCharacters();
230 }
231 if (_currToken == JsonToken.FIELD_NAME) {
232 // not necessarily; possible but:
233 return _nameCopied;
234 }
235 // other types, no benefit from accessing as char[]
236 return false;
237 }
238
239 /*
240 /**********************************************************************
241 /* Public API, access to token information, text
242 /**********************************************************************
243 */
244
245 /**
246 * Method for accessing textual representation of the current event;
247 * if no current event (before first call to {@link #nextToken}, or
248 * after encountering end-of-input), returns null.
249 * Method can be called for any event.
250 */
251 @Override
252 public String getText() throws IOException
253 {
254 if (_currToken == JsonToken.VALUE_STRING) {
255 return _textBuffer.contentsAsString();
256 }
257 JsonToken t = _currToken;
258 if (t == null || _currToken == JsonToken.NOT_AVAILABLE) { // null only before/after document
259 return null;
260 }
261 if (t == JsonToken.FIELD_NAME) {
262 return _parsingContext.getCurrentName();
263 }
264 if (t.isNumeric()) {
265 // TODO: optimize?
266 return getNumberValue().toString();
267 }
268 return _currToken.asString();
269 }
270
271 @Override
272 public char[] getTextCharacters() throws IOException
273 {
274 switch (currentTokenId()) {
275 case JsonTokenId.ID_STRING:
276 return _textBuffer.getTextBuffer();
277 case JsonTokenId.ID_FIELD_NAME:
278 if (!_nameCopied) {
279 String name = _parsingContext.getCurrentName();
280 int nameLen = name.length();
281 if (_nameCopyBuffer == null) {
282 _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen);
283 } else if (_nameCopyBuffer.length < nameLen) {
284 _nameCopyBuffer = new char[nameLen];
285 }
286 name.getChars(0, nameLen, _nameCopyBuffer, 0);
287 _nameCopied = true;
288 }
289 return _nameCopyBuffer;
290 case JsonTokenId.ID_NUMBER_INT:
291 case JsonTokenId.ID_NUMBER_FLOAT:
292 return getNumberValue().toString().toCharArray();
293 case JsonTokenId.ID_NO_TOKEN:
294 case JsonTokenId.ID_NOT_AVAILABLE:
295 return null;
296 default:
297 return _currToken.asCharArray();
298 }
299 }
300
301 @Override
302 public int getTextLength() throws IOException
303 {
304 switch (currentTokenId()) {
305 case JsonTokenId.ID_STRING:
306 return _textBuffer.size();
307 case JsonTokenId.ID_FIELD_NAME:
308 return _parsingContext.getCurrentName().length();
309 case JsonTokenId.ID_NUMBER_INT:
310 case JsonTokenId.ID_NUMBER_FLOAT:
311 return getNumberValue().toString().length();
312 case JsonTokenId.ID_NO_TOKEN:
313 case JsonTokenId.ID_NOT_AVAILABLE:
314 return 0; // or throw exception?
315 default:
316 return _currToken.asCharArray().length;
317 }
318 }
319
320 @Override
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700321 public int getTextOffset() throws IOException {
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700322 return 0;
323 }
324
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700325 @Override
326 public int getText(Writer w) throws IOException
327 {
328 if (_currToken == JsonToken.VALUE_STRING) {
329 return _textBuffer.contentsToWriter(w);
330 }
331 if (_currToken == JsonToken.NOT_AVAILABLE) {
332 _reportError("Current token not available: can not call this method");
333 }
334 // otherwise default handling works fine
335 return super.getText(w);
336 }
337
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700338 /*
339 /**********************************************************************
340 /* Public API, access to token information, binary
341 /**********************************************************************
342 */
343
344 @Override
345 public byte[] getBinaryValue(Base64Variant b64variant) throws IOException
346 {
347 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
348 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
349 }
350 return _binaryValue;
351 }
352
353 @Override
354 public Object getEmbeddedObject() throws IOException
355 {
356 if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) {
357 return _binaryValue;
358 }
359 return null;
360 }
361
362 @Override
363 public int readBinaryValue(Base64Variant b64variant, OutputStream out)
364 throws IOException {
365 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
366 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
367 }
368 out.write(_binaryValue);
369 return _binaryValue.length;
370 }
371
372 /*
373 /**********************************************************************
Tatu Saloranta8e8ed3e2017-05-19 17:07:48 -0700374 /* Handling of nested scope, state
375 /**********************************************************************
376 */
377
378 protected final JsonToken _startArrayScope() throws IOException
379 {
380 _parsingContext = _parsingContext.createChildArrayContext(-1, -1);
381 _majorState = MAJOR_ARRAY_ELEMENT;
382 _majorStateAfterValue = MAJOR_ARRAY_ELEMENT;
383 return (_currToken = JsonToken.START_ARRAY);
384 }
385
386 protected final JsonToken _startObjectScope() throws IOException
387 {
388 _parsingContext = _parsingContext.createChildObjectContext(-1, -1);
389 _majorState = MAJOR_OBJECT_FIELD;
390 _majorStateAfterValue = MAJOR_OBJECT_FIELD;
391 return (_currToken = JsonToken.START_OBJECT);
392 }
393
394 protected final JsonToken _closeArrayScope() throws IOException
395 {
396 if (!_parsingContext.inArray()) {
397 _reportMismatchedEndMarker(']', '}');
398 }
399 JsonReadContext ctxt = _parsingContext.getParent();
400 _parsingContext = ctxt;
401 int st;
402 if (ctxt.inObject()) {
403 st = MAJOR_OBJECT_FIELD;
404 } else if (ctxt.inArray()) {
405 st = MAJOR_ARRAY_ELEMENT;
406 } else {
407 st = MAJOR_ROOT;
408 }
409 _majorState = st;
410 _majorStateAfterValue = st;
411 return (_currToken = JsonToken.END_ARRAY);
412 }
413
414 protected final JsonToken _closeObjectScope() throws IOException
415 {
416 if (!_parsingContext.inObject()) {
417 _reportMismatchedEndMarker('}', ']');
418 }
419 JsonReadContext ctxt = _parsingContext.getParent();
420 _parsingContext = ctxt;
421 int st;
422 if (ctxt.inObject()) {
423 st = MAJOR_OBJECT_FIELD;
424 } else if (ctxt.inArray()) {
425 st = MAJOR_ARRAY_ELEMENT;
426 } else {
427 st = MAJOR_ROOT;
428 }
429 _majorState = st;
430 _majorStateAfterValue = st;
431 return (_currToken = JsonToken.END_OBJECT);
432 }
433
434 /*
435 /**********************************************************************
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700436 /* Internal methods, field name parsing
437 /**********************************************************************
438 */
439
440 // Helper method for trying to find specified encoded UTF-8 byte sequence
441 // from symbol table; if successful avoids actual decoding to String
442 protected final String _findDecodedFromSymbols(byte[] inBuf, int inPtr, int len) throws IOException
443 {
444 // First: maybe we already have this name decoded?
445 if (len < 5) {
446 int q = inBuf[inPtr] & 0xFF;
447 if (--len > 0) {
448 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
449 if (--len > 0) {
450 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
451 if (--len > 0) {
452 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
453 }
454 }
455 }
456 _quad1 = q;
457 return _symbols.findName(q);
458 }
459 if (len < 9) {
460 // First quadbyte is easy
461 int q1 = (inBuf[inPtr] & 0xFF) << 8;
462 q1 += (inBuf[++inPtr] & 0xFF);
463 q1 <<= 8;
464 q1 += (inBuf[++inPtr] & 0xFF);
465 q1 <<= 8;
466 q1 += (inBuf[++inPtr] & 0xFF);
467 int q2 = (inBuf[++inPtr] & 0xFF);
468 len -= 5;
469 if (len > 0) {
470 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
471 if (--len > 0) {
472 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
473 if (--len > 0) {
474 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
475 }
476 }
477 }
478 _quad1 = q1;
479 _quad2 = q2;
480 return _symbols.findName(q1, q2);
481 }
482 return _findDecodedLonger(inBuf, inPtr, len);
483 }
484
485 // Method for locating names longer than 8 bytes (in UTF-8)
486 private final String _findDecodedLonger(byte[] inBuf, int inPtr, int len) throws IOException
487 {
488 // first, need enough buffer to store bytes as ints:
489 {
490 int bufLen = (len + 3) >> 2;
491 if (bufLen > _quadBuffer.length) {
492 _quadBuffer = Arrays.copyOf(_quadBuffer, bufLen+4);
493 }
494 }
495 // then decode, full quads first
496 int offset = 0;
497 do {
498 int q = (inBuf[inPtr++] & 0xFF) << 8;
499 q |= inBuf[inPtr++] & 0xFF;
500 q <<= 8;
501 q |= inBuf[inPtr++] & 0xFF;
502 q <<= 8;
503 q |= inBuf[inPtr++] & 0xFF;
504 _quadBuffer[offset++] = q;
505 } while ((len -= 4) > 3);
506 // and then leftovers
507 if (len > 0) {
508 int q = inBuf[inPtr] & 0xFF;
509 if (--len > 0) {
510 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
511 if (--len > 0) {
512 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
513 }
514 }
515 _quadBuffer[offset++] = q;
516 }
517 return _symbols.findName(_quadBuffer, offset);
518 }
519
520 protected final String _addDecodedToSymbols(int len, String name)
521 {
522 if (len < 5) {
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700523 return _symbols.addName(name, _quad1);
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700524 }
525 if (len < 9) {
526 return _symbols.addName(name, _quad1, _quad2);
527 }
528 int qlen = (len + 3) >> 2;
529 return _symbols.addName(name, _quadBuffer, qlen);
530 }
531
532 /*
533 /**********************************************************************
534 /* Internal methods, state changes
535 /**********************************************************************
536 */
537
538 /**
539 * Helper method called at point when all input has been exhausted and
540 * input feeder has indicated no more input will be forthcoming.
541 */
542 protected final JsonToken _eofAsNextToken() throws IOException {
543 _majorState = MAJOR_CLOSED;
544 if (!_parsingContext.inRoot()) {
545 _handleEOF();
546 }
547 close();
548 return (_currToken = null);
549 }
550
551 protected final JsonToken _valueComplete(JsonToken t) throws IOException
552 {
553 _majorState = _majorStateAfterValue;
554 _currToken = t;
555 return t;
556 }
557
558 /*
559 /**********************************************************************
560 /* Internal methods, error reporting
561 /**********************************************************************
562 */
563
564 protected void _reportInvalidInitial(int mask) throws JsonParseException {
565 _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask));
566 }
567
568 protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException {
569 _inputPtr = ptr;
570 _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask));
571 }
572}