blob: d6d613ace26786dfda6b2b63055e3493dc2d1c03 [file] [log] [blame]
Tatu Saloranta056bf5e2017-05-14 23:59:00 -07001package com.fasterxml.jackson.core.json.async;
2
3import java.io.*;
4import java.util.Arrays;
5
6import com.fasterxml.jackson.core.*;
7import com.fasterxml.jackson.core.async.NonBlockingInputFeeder;
8import com.fasterxml.jackson.core.base.ParserBase;
9import com.fasterxml.jackson.core.io.IOContext;
10import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
11
12/**
13 * Intermediate base class for non-blocking JSON parsers.
14 */
15public abstract class NonBlockingParserBase<F extends NonBlockingInputFeeder>
16 extends ParserBase
17{
18 /*
19 /**********************************************************************
20 /* Major state constants
21 /**********************************************************************
22 */
23
24 /**
25 * State right after parser has been constructed, before seeing the first byte
26 * to know if there's header.
27 */
28 protected final static int MAJOR_INITIAL = 0;
29
30 /**
31 * State right after parser a root value has been
32 * finished, but next token has not yet been recognized.
33 */
34 protected final static int MAJOR_ROOT = 1;
35
36 protected final static int MAJOR_OBJECT_FIELD = 2;
37 protected final static int MAJOR_OBJECT_VALUE = 3;
38
39 protected final static int MAJOR_ARRAY_ELEMENT = 4;
40
41 /**
42 * State after non-blocking input source has indicated that no more input
43 * is forthcoming AND we have exhausted all the input
44 */
45 protected final static int MAJOR_CLOSED = 5;
46
47 // // // "Sub-states"
48
49 protected final static int MINOR_FIELD_NAME = 1;
50
51 protected final static int MINOR_VALUE_NUMBER = 6;
52
53 protected final static int MINOR_VALUE_STRING = 15;
54
55 protected final static int MINOR_VALUE_TOKEN_NULL = 15;
56 protected final static int MINOR_VALUE_TOKEN_TRUE = 15;
57 protected final static int MINOR_VALUE_TOKEN_FALSE = 15;
58
59 /*
60 /**********************************************************************
61 /* Helper objects, symbols (field names)
62 /**********************************************************************
63 */
64
65 /**
66 * Symbol table that contains field names encountered so far
67 */
68 final protected ByteQuadsCanonicalizer _symbols;
69
70 /**
71 * Temporary buffer used for name parsing.
72 */
73 protected int[] _quadBuffer = NO_INTS;
74
75 /**
76 * Quads used for hash calculation
77 */
78 protected int _quad1, _quad2;
79
80 /*
81 /**********************************************************************
82 /* Additional parsing state
83 /**********************************************************************
84 */
85
86 /**
87 * Current main decoding state
88 */
89 protected int _majorState;
90
91 /**
92 * Addition indicator within state; contextually relevant for just that state
93 */
94 protected int _minorState;
95
96 /**
97 * Value of {@link #_majorState} after completing a scalar value
98 */
99 protected int _majorStateAfterValue;
100
101 /**
102 * Flag that is sent when calling application indicates that there will
103 * be no more input to parse.
104 */
105 protected boolean _endOfInput = false;
106
107 /*
108 /**********************************************************************
109 /* Other buffering
110 /**********************************************************************
111 */
112
113 /**
114 * Temporary buffer for holding content if input not contiguous (but can
115 * fit in buffer)
116 */
117 protected byte[] _inputCopy;
118
119 /**
120 * Number of bytes buffered in <code>_inputCopy</code>
121 */
122 protected int _inputCopyLen;
123
124 /**
125 * Temporary storage for 32-bit values (int, float), as well as length markers
126 * for length-prefixed values.
127 */
128 protected int _pending32;
129
130 /**
131 * Temporary storage for 64-bit values (long, double), secondary storage
132 * for some other things (scale of BigDecimal values)
133 */
134 protected long _pending64;
135
136 /*
137 /**********************************************************************
138 /* Life-cycle
139 /**********************************************************************
140 */
141
142 public NonBlockingParserBase(IOContext ctxt, int parserFeatures,
143 ByteQuadsCanonicalizer sym)
144 {
145 super(ctxt, parserFeatures);
146 _symbols = sym;
147 // We don't need a lot; for most things maximum known a-priori length below 70 bytes
148 _inputCopy = ctxt.allocReadIOBuffer(500);
149
150 _currToken = null;
151 _majorState = MAJOR_INITIAL;
152 }
153
154 @Override
155 public ObjectCodec getCodec() {
156 return null;
157 }
158
159 @Override
160 public void setCodec(ObjectCodec c) {
161 throw new UnsupportedOperationException("Can not use ObjectMapper with non-blocking parser");
162 }
163
164 /**
165 * @since 2.9
166 */
167 @Override
168 public boolean canParseAsync() { return true; }
169
170 /*
171 /**********************************************************
172 /* Abstract methods from JsonParser
173 /**********************************************************
174 */
175
176 @Override
177 public abstract int releaseBuffered(OutputStream out) throws IOException;
178
179 @Override
180 public Object getInputSource() {
181 // since input is "pushed", to traditional source...
182 return null;
183 }
184
185 @Override
186 protected void _closeInput() throws IOException {
187 // nothing to do here
188 }
189
190 /*
191 /**********************************************************************
192 /* Overridden methods
193 /**********************************************************************
194 */
195
196 @Override
197 public boolean hasTextCharacters()
198 {
199 if (_currToken == JsonToken.VALUE_STRING) {
200 // yes; is or can be made available efficiently as char[]
201 return _textBuffer.hasTextAsCharacters();
202 }
203 if (_currToken == JsonToken.FIELD_NAME) {
204 // not necessarily; possible but:
205 return _nameCopied;
206 }
207 // other types, no benefit from accessing as char[]
208 return false;
209 }
210
211 /*
212 /**********************************************************************
213 /* Public API, access to token information, text
214 /**********************************************************************
215 */
216
217 /**
218 * Method for accessing textual representation of the current event;
219 * if no current event (before first call to {@link #nextToken}, or
220 * after encountering end-of-input), returns null.
221 * Method can be called for any event.
222 */
223 @Override
224 public String getText() throws IOException
225 {
226 if (_currToken == JsonToken.VALUE_STRING) {
227 return _textBuffer.contentsAsString();
228 }
229 JsonToken t = _currToken;
230 if (t == null || _currToken == JsonToken.NOT_AVAILABLE) { // null only before/after document
231 return null;
232 }
233 if (t == JsonToken.FIELD_NAME) {
234 return _parsingContext.getCurrentName();
235 }
236 if (t.isNumeric()) {
237 // TODO: optimize?
238 return getNumberValue().toString();
239 }
240 return _currToken.asString();
241 }
242
243 @Override
244 public char[] getTextCharacters() throws IOException
245 {
246 switch (currentTokenId()) {
247 case JsonTokenId.ID_STRING:
248 return _textBuffer.getTextBuffer();
249 case JsonTokenId.ID_FIELD_NAME:
250 if (!_nameCopied) {
251 String name = _parsingContext.getCurrentName();
252 int nameLen = name.length();
253 if (_nameCopyBuffer == null) {
254 _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen);
255 } else if (_nameCopyBuffer.length < nameLen) {
256 _nameCopyBuffer = new char[nameLen];
257 }
258 name.getChars(0, nameLen, _nameCopyBuffer, 0);
259 _nameCopied = true;
260 }
261 return _nameCopyBuffer;
262 case JsonTokenId.ID_NUMBER_INT:
263 case JsonTokenId.ID_NUMBER_FLOAT:
264 return getNumberValue().toString().toCharArray();
265 case JsonTokenId.ID_NO_TOKEN:
266 case JsonTokenId.ID_NOT_AVAILABLE:
267 return null;
268 default:
269 return _currToken.asCharArray();
270 }
271 }
272
273 @Override
274 public int getTextLength() throws IOException
275 {
276 switch (currentTokenId()) {
277 case JsonTokenId.ID_STRING:
278 return _textBuffer.size();
279 case JsonTokenId.ID_FIELD_NAME:
280 return _parsingContext.getCurrentName().length();
281 case JsonTokenId.ID_NUMBER_INT:
282 case JsonTokenId.ID_NUMBER_FLOAT:
283 return getNumberValue().toString().length();
284 case JsonTokenId.ID_NO_TOKEN:
285 case JsonTokenId.ID_NOT_AVAILABLE:
286 return 0; // or throw exception?
287 default:
288 return _currToken.asCharArray().length;
289 }
290 }
291
292 @Override
293 public int getTextOffset() throws IOException
294 {
295 return 0;
296 }
297
298// public abstract int getText(Writer w) throws IOException;
299
300 /*
301 /**********************************************************************
302 /* Public API, access to token information, binary
303 /**********************************************************************
304 */
305
306 @Override
307 public byte[] getBinaryValue(Base64Variant b64variant) throws IOException
308 {
309 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
310 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
311 }
312 return _binaryValue;
313 }
314
315 @Override
316 public Object getEmbeddedObject() throws IOException
317 {
318 if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) {
319 return _binaryValue;
320 }
321 return null;
322 }
323
324 @Override
325 public int readBinaryValue(Base64Variant b64variant, OutputStream out)
326 throws IOException {
327 if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
328 _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
329 }
330 out.write(_binaryValue);
331 return _binaryValue.length;
332 }
333
334 /*
335 /**********************************************************************
336 /* Internal methods, field name parsing
337 /**********************************************************************
338 */
339
340 // Helper method for trying to find specified encoded UTF-8 byte sequence
341 // from symbol table; if successful avoids actual decoding to String
342 protected final String _findDecodedFromSymbols(byte[] inBuf, int inPtr, int len) throws IOException
343 {
344 // First: maybe we already have this name decoded?
345 if (len < 5) {
346 int q = inBuf[inPtr] & 0xFF;
347 if (--len > 0) {
348 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
349 if (--len > 0) {
350 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
351 if (--len > 0) {
352 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
353 }
354 }
355 }
356 _quad1 = q;
357 return _symbols.findName(q);
358 }
359 if (len < 9) {
360 // First quadbyte is easy
361 int q1 = (inBuf[inPtr] & 0xFF) << 8;
362 q1 += (inBuf[++inPtr] & 0xFF);
363 q1 <<= 8;
364 q1 += (inBuf[++inPtr] & 0xFF);
365 q1 <<= 8;
366 q1 += (inBuf[++inPtr] & 0xFF);
367 int q2 = (inBuf[++inPtr] & 0xFF);
368 len -= 5;
369 if (len > 0) {
370 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
371 if (--len > 0) {
372 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
373 if (--len > 0) {
374 q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
375 }
376 }
377 }
378 _quad1 = q1;
379 _quad2 = q2;
380 return _symbols.findName(q1, q2);
381 }
382 return _findDecodedLonger(inBuf, inPtr, len);
383 }
384
385 // Method for locating names longer than 8 bytes (in UTF-8)
386 private final String _findDecodedLonger(byte[] inBuf, int inPtr, int len) throws IOException
387 {
388 // first, need enough buffer to store bytes as ints:
389 {
390 int bufLen = (len + 3) >> 2;
391 if (bufLen > _quadBuffer.length) {
392 _quadBuffer = Arrays.copyOf(_quadBuffer, bufLen+4);
393 }
394 }
395 // then decode, full quads first
396 int offset = 0;
397 do {
398 int q = (inBuf[inPtr++] & 0xFF) << 8;
399 q |= inBuf[inPtr++] & 0xFF;
400 q <<= 8;
401 q |= inBuf[inPtr++] & 0xFF;
402 q <<= 8;
403 q |= inBuf[inPtr++] & 0xFF;
404 _quadBuffer[offset++] = q;
405 } while ((len -= 4) > 3);
406 // and then leftovers
407 if (len > 0) {
408 int q = inBuf[inPtr] & 0xFF;
409 if (--len > 0) {
410 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
411 if (--len > 0) {
412 q = (q << 8) + (inBuf[++inPtr] & 0xFF);
413 }
414 }
415 _quadBuffer[offset++] = q;
416 }
417 return _symbols.findName(_quadBuffer, offset);
418 }
419
420 protected final String _addDecodedToSymbols(int len, String name)
421 {
422 if (len < 5) {
423 return _symbols.addName(name, _quad1, 0);
424 }
425 if (len < 9) {
426 return _symbols.addName(name, _quad1, _quad2);
427 }
428 int qlen = (len + 3) >> 2;
429 return _symbols.addName(name, _quadBuffer, qlen);
430 }
431
432 /*
433 /**********************************************************************
434 /* Internal methods, state changes
435 /**********************************************************************
436 */
437
438 /**
439 * Helper method called at point when all input has been exhausted and
440 * input feeder has indicated no more input will be forthcoming.
441 */
442 protected final JsonToken _eofAsNextToken() throws IOException {
443 _majorState = MAJOR_CLOSED;
444 if (!_parsingContext.inRoot()) {
445 _handleEOF();
446 }
447 close();
448 return (_currToken = null);
449 }
450
451 protected final JsonToken _valueComplete(JsonToken t) throws IOException
452 {
453 _majorState = _majorStateAfterValue;
454 _currToken = t;
455 return t;
456 }
457
458 /*
459 /**********************************************************************
460 /* Internal methods, error reporting
461 /**********************************************************************
462 */
463
464 protected void _reportInvalidInitial(int mask) throws JsonParseException {
465 _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask));
466 }
467
468 protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException {
469 _inputPtr = ptr;
470 _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask));
471 }
472}