blob: 4955417c2d2049b00da93040c3d7eb744aae4682 [file] [log] [blame]
Tatu Saloranta056bf5e2017-05-14 23:59:00 -07001package com.fasterxml.jackson.core.json.async;
2
3import java.io.*;
4import java.util.Arrays;
5
6import com.fasterxml.jackson.core.*;
Tatu Saloranta056bf5e2017-05-14 23:59:00 -07007import com.fasterxml.jackson.core.base.ParserBase;
8import com.fasterxml.jackson.core.io.IOContext;
9import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
10
11/**
12 * Intermediate base class for non-blocking JSON parsers.
13 */
Tatu Saloranta81fb43c2017-05-17 18:25:34 -070014public abstract class NonBlockingJsonParserBase
Tatu Saloranta056bf5e2017-05-14 23:59:00 -070015 extends ParserBase
16{
17 /*
18 /**********************************************************************
19 /* Major state constants
20 /**********************************************************************
21 */
22
23 /**
24 * State right after parser has been constructed, before seeing the first byte
25 * to know if there's header.
26 */
27 protected final static int MAJOR_INITIAL = 0;
28
29 /**
30 * State right after parser a root value has been
31 * finished, but next token has not yet been recognized.
32 */
33 protected final static int MAJOR_ROOT = 1;
34
35 protected final static int MAJOR_OBJECT_FIELD = 2;
36 protected final static int MAJOR_OBJECT_VALUE = 3;
37
38 protected final static int MAJOR_ARRAY_ELEMENT = 4;
39
40 /**
41 * State after non-blocking input source has indicated that no more input
42 * is forthcoming AND we have exhausted all the input
43 */
44 protected final static int MAJOR_CLOSED = 5;
45
46 // // // "Sub-states"
47
48 protected final static int MINOR_FIELD_NAME = 1;
49
50 protected final static int MINOR_VALUE_NUMBER = 6;
51
52 protected final static int MINOR_VALUE_STRING = 15;
53
54 protected final static int MINOR_VALUE_TOKEN_NULL = 15;
55 protected final static int MINOR_VALUE_TOKEN_TRUE = 15;
56 protected final static int MINOR_VALUE_TOKEN_FALSE = 15;
57
58 /*
59 /**********************************************************************
60 /* Helper objects, symbols (field names)
61 /**********************************************************************
62 */
63
64 /**
65 * Symbol table that contains field names encountered so far
66 */
67 final protected ByteQuadsCanonicalizer _symbols;
68
69 /**
70 * Temporary buffer used for name parsing.
71 */
72 protected int[] _quadBuffer = NO_INTS;
73
74 /**
75 * Quads used for hash calculation
76 */
77 protected int _quad1, _quad2;
78
79 /*
80 /**********************************************************************
81 /* Additional parsing state
82 /**********************************************************************
83 */
84
85 /**
86 * Current main decoding state
87 */
88 protected int _majorState;
89
90 /**
91 * Addition indicator within state; contextually relevant for just that state
92 */
93 protected int _minorState;
94
95 /**
96 * Value of {@link #_majorState} after completing a scalar value
97 */
98 protected int _majorStateAfterValue;
99
100 /**
101 * Flag that is sent when calling application indicates that there will
102 * be no more input to parse.
103 */
104 protected boolean _endOfInput = false;
105
106 /*
107 /**********************************************************************
108 /* Other buffering
109 /**********************************************************************
110 */
111
112 /**
113 * Temporary buffer for holding content if input not contiguous (but can
114 * fit in buffer)
115 */
116 protected byte[] _inputCopy;
117
118 /**
119 * Number of bytes buffered in <code>_inputCopy</code>
120 */
121 protected int _inputCopyLen;
122
123 /**
124 * Temporary storage for 32-bit values (int, float), as well as length markers
125 * for length-prefixed values.
126 */
127 protected int _pending32;
128
129 /**
130 * Temporary storage for 64-bit values (long, double), secondary storage
131 * for some other things (scale of BigDecimal values)
132 */
133 protected long _pending64;
134
135 /*
136 /**********************************************************************
137 /* Life-cycle
138 /**********************************************************************
139 */
140
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700141 public NonBlockingJsonParserBase(IOContext ctxt, int parserFeatures,
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700142 ByteQuadsCanonicalizer sym)
143 {
144 super(ctxt, parserFeatures);
145 _symbols = sym;
146 // We don't need a lot; for most things maximum known a-priori length below 70 bytes
147 _inputCopy = ctxt.allocReadIOBuffer(500);
148
149 _currToken = null;
150 _majorState = MAJOR_INITIAL;
151 }
152
153 @Override
154 public ObjectCodec getCodec() {
155 return null;
156 }
157
158 @Override
159 public void setCodec(ObjectCodec c) {
160 throw new UnsupportedOperationException("Can not use ObjectMapper with non-blocking parser");
161 }
162
163 /**
164 * @since 2.9
165 */
166 @Override
167 public boolean canParseAsync() { return true; }
168
169 /*
170 /**********************************************************
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700171 /* Test support
172 /**********************************************************
173 */
174
175 protected ByteQuadsCanonicalizer symbolTableForTests() {
176 return _symbols;
177 }
178
179 /*
180 /**********************************************************
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700181 /* Abstract methods from JsonParser
182 /**********************************************************
183 */
184
185 @Override
186 public abstract int releaseBuffered(OutputStream out) throws IOException;
187
188 @Override
189 public Object getInputSource() {
190 // since input is "pushed", to traditional source...
191 return null;
192 }
193
194 @Override
195 protected void _closeInput() throws IOException {
196 // nothing to do here
197 }
198
199 /*
200 /**********************************************************************
201 /* Overridden methods
202 /**********************************************************************
203 */
204
205 @Override
206 public boolean hasTextCharacters()
207 {
208 if (_currToken == JsonToken.VALUE_STRING) {
209 // yes; is or can be made available efficiently as char[]
210 return _textBuffer.hasTextAsCharacters();
211 }
212 if (_currToken == JsonToken.FIELD_NAME) {
213 // not necessarily; possible but:
214 return _nameCopied;
215 }
216 // other types, no benefit from accessing as char[]
217 return false;
218 }
219
220 /*
221 /**********************************************************************
222 /* Public API, access to token information, text
223 /**********************************************************************
224 */
225
226 /**
227 * Method for accessing textual representation of the current event;
228 * if no current event (before first call to {@link #nextToken}, or
229 * after encountering end-of-input), returns null.
230 * Method can be called for any event.
231 */
232 @Override
233 public String getText() throws IOException
234 {
235 if (_currToken == JsonToken.VALUE_STRING) {
236 return _textBuffer.contentsAsString();
237 }
238 JsonToken t = _currToken;
239 if (t == null || _currToken == JsonToken.NOT_AVAILABLE) { // null only before/after document
240 return null;
241 }
242 if (t == JsonToken.FIELD_NAME) {
243 return _parsingContext.getCurrentName();
244 }
245 if (t.isNumeric()) {
246 // TODO: optimize?
247 return getNumberValue().toString();
248 }
249 return _currToken.asString();
250 }
251
252 @Override
253 public char[] getTextCharacters() throws IOException
254 {
255 switch (currentTokenId()) {
256 case JsonTokenId.ID_STRING:
257 return _textBuffer.getTextBuffer();
258 case JsonTokenId.ID_FIELD_NAME:
259 if (!_nameCopied) {
260 String name = _parsingContext.getCurrentName();
261 int nameLen = name.length();
262 if (_nameCopyBuffer == null) {
263 _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen);
264 } else if (_nameCopyBuffer.length < nameLen) {
265 _nameCopyBuffer = new char[nameLen];
266 }
267 name.getChars(0, nameLen, _nameCopyBuffer, 0);
268 _nameCopied = true;
269 }
270 return _nameCopyBuffer;
271 case JsonTokenId.ID_NUMBER_INT:
272 case JsonTokenId.ID_NUMBER_FLOAT:
273 return getNumberValue().toString().toCharArray();
274 case JsonTokenId.ID_NO_TOKEN:
275 case JsonTokenId.ID_NOT_AVAILABLE:
276 return null;
277 default:
278 return _currToken.asCharArray();
279 }
280 }
281
282 @Override
283 public int getTextLength() throws IOException
284 {
285 switch (currentTokenId()) {
286 case JsonTokenId.ID_STRING:
287 return _textBuffer.size();
288 case JsonTokenId.ID_FIELD_NAME:
289 return _parsingContext.getCurrentName().length();
290 case JsonTokenId.ID_NUMBER_INT:
291 case JsonTokenId.ID_NUMBER_FLOAT:
292 return getNumberValue().toString().length();
293 case JsonTokenId.ID_NO_TOKEN:
294 case JsonTokenId.ID_NOT_AVAILABLE:
295 return 0; // or throw exception?
296 default:
297 return _currToken.asCharArray().length;
298 }
299 }
300
301 @Override
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700302 public int getTextOffset() throws IOException {
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700303 return 0;
304 }
305
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700306 @Override
307 public int getText(Writer w) throws IOException
308 {
309 if (_currToken == JsonToken.VALUE_STRING) {
310 return _textBuffer.contentsToWriter(w);
311 }
312 if (_currToken == JsonToken.NOT_AVAILABLE) {
313 _reportError("Current token not available: can not call this method");
314 }
315 // otherwise default handling works fine
316 return super.getText(w);
317 }
318
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700319 /*
320 /**********************************************************************
321 /* Public API, access to token information, binary
322 /**********************************************************************
323 */
324
325 @Override
326 public byte[] getBinaryValue(Base64Variant b64variant) throws IOException
327 {
328 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
329 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
330 }
331 return _binaryValue;
332 }
333
334 @Override
335 public Object getEmbeddedObject() throws IOException
336 {
337 if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) {
338 return _binaryValue;
339 }
340 return null;
341 }
342
343 @Override
344 public int readBinaryValue(Base64Variant b64variant, OutputStream out)
345 throws IOException {
346 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
347 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
348 }
349 out.write(_binaryValue);
350 return _binaryValue.length;
351 }
352
353 /*
354 /**********************************************************************
355 /* Internal methods, field name parsing
356 /**********************************************************************
357 */
358
359 // Helper method for trying to find specified encoded UTF-8 byte sequence
360 // from symbol table; if successful avoids actual decoding to String
361 protected final String _findDecodedFromSymbols(byte[] inBuf, int inPtr, int len) throws IOException
362 {
363 // First: maybe we already have this name decoded?
364 if (len < 5) {
365 int q = inBuf[inPtr] & 0xFF;
366 if (--len > 0) {
367 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
368 if (--len > 0) {
369 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
370 if (--len > 0) {
371 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
372 }
373 }
374 }
375 _quad1 = q;
376 return _symbols.findName(q);
377 }
378 if (len < 9) {
379 // First quadbyte is easy
380 int q1 = (inBuf[inPtr] & 0xFF) << 8;
381 q1 += (inBuf[++inPtr] & 0xFF);
382 q1 <<= 8;
383 q1 += (inBuf[++inPtr] & 0xFF);
384 q1 <<= 8;
385 q1 += (inBuf[++inPtr] & 0xFF);
386 int q2 = (inBuf[++inPtr] & 0xFF);
387 len -= 5;
388 if (len > 0) {
389 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
390 if (--len > 0) {
391 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
392 if (--len > 0) {
393 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
394 }
395 }
396 }
397 _quad1 = q1;
398 _quad2 = q2;
399 return _symbols.findName(q1, q2);
400 }
401 return _findDecodedLonger(inBuf, inPtr, len);
402 }
403
404 // Method for locating names longer than 8 bytes (in UTF-8)
405 private final String _findDecodedLonger(byte[] inBuf, int inPtr, int len) throws IOException
406 {
407 // first, need enough buffer to store bytes as ints:
408 {
409 int bufLen = (len + 3) >> 2;
410 if (bufLen > _quadBuffer.length) {
411 _quadBuffer = Arrays.copyOf(_quadBuffer, bufLen+4);
412 }
413 }
414 // then decode, full quads first
415 int offset = 0;
416 do {
417 int q = (inBuf[inPtr++] & 0xFF) << 8;
418 q |= inBuf[inPtr++] & 0xFF;
419 q <<= 8;
420 q |= inBuf[inPtr++] & 0xFF;
421 q <<= 8;
422 q |= inBuf[inPtr++] & 0xFF;
423 _quadBuffer[offset++] = q;
424 } while ((len -= 4) > 3);
425 // and then leftovers
426 if (len > 0) {
427 int q = inBuf[inPtr] & 0xFF;
428 if (--len > 0) {
429 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
430 if (--len > 0) {
431 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
432 }
433 }
434 _quadBuffer[offset++] = q;
435 }
436 return _symbols.findName(_quadBuffer, offset);
437 }
438
439 protected final String _addDecodedToSymbols(int len, String name)
440 {
441 if (len < 5) {
Tatu Saloranta81fb43c2017-05-17 18:25:34 -0700442 return _symbols.addName(name, _quad1);
Tatu Saloranta056bf5e2017-05-14 23:59:00 -0700443 }
444 if (len < 9) {
445 return _symbols.addName(name, _quad1, _quad2);
446 }
447 int qlen = (len + 3) >> 2;
448 return _symbols.addName(name, _quadBuffer, qlen);
449 }
450
451 /*
452 /**********************************************************************
453 /* Internal methods, state changes
454 /**********************************************************************
455 */
456
457 /**
458 * Helper method called at point when all input has been exhausted and
459 * input feeder has indicated no more input will be forthcoming.
460 */
461 protected final JsonToken _eofAsNextToken() throws IOException {
462 _majorState = MAJOR_CLOSED;
463 if (!_parsingContext.inRoot()) {
464 _handleEOF();
465 }
466 close();
467 return (_currToken = null);
468 }
469
470 protected final JsonToken _valueComplete(JsonToken t) throws IOException
471 {
472 _majorState = _majorStateAfterValue;
473 _currToken = t;
474 return t;
475 }
476
477 /*
478 /**********************************************************************
479 /* Internal methods, error reporting
480 /**********************************************************************
481 */
482
483 protected void _reportInvalidInitial(int mask) throws JsonParseException {
484 _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask));
485 }
486
487 protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException {
488 _inputPtr = ptr;
489 _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask));
490 }
491}