Tatu Saloranta | 056bf5e | 2017-05-14 23:59:00 -0700 | [diff] [blame] | 1 | package com.fasterxml.jackson.core.json.async; |
| 2 | |
| 3 | import java.io.*; |
| 4 | import java.util.Arrays; |
| 5 | |
| 6 | import com.fasterxml.jackson.core.*; |
Tatu Saloranta | 056bf5e | 2017-05-14 23:59:00 -0700 | [diff] [blame] | 7 | import com.fasterxml.jackson.core.base.ParserBase; |
| 8 | import com.fasterxml.jackson.core.io.IOContext; |
| 9 | import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer; |
| 10 | |
| 11 | /** |
| 12 | * Intermediate base class for non-blocking JSON parsers. |
| 13 | */ |
Tatu Saloranta | 81fb43c | 2017-05-17 18:25:34 -0700 | [diff] [blame^] | 14 | public abstract class NonBlockingJsonParserBase |
Tatu Saloranta | 056bf5e | 2017-05-14 23:59:00 -0700 | [diff] [blame] | 15 | extends ParserBase |
| 16 | { |
| 17 | /* |
| 18 | /********************************************************************** |
| 19 | /* Major state constants |
| 20 | /********************************************************************** |
| 21 | */ |
| 22 | |
| 23 | /** |
| 24 | * State right after parser has been constructed, before seeing the first byte |
| 25 | * to know if there's header. |
| 26 | */ |
| 27 | protected final static int MAJOR_INITIAL = 0; |
| 28 | |
| 29 | /** |
| 30 | * State right after parser a root value has been |
| 31 | * finished, but next token has not yet been recognized. |
| 32 | */ |
| 33 | protected final static int MAJOR_ROOT = 1; |
| 34 | |
| 35 | protected final static int MAJOR_OBJECT_FIELD = 2; |
| 36 | protected final static int MAJOR_OBJECT_VALUE = 3; |
| 37 | |
| 38 | protected final static int MAJOR_ARRAY_ELEMENT = 4; |
| 39 | |
| 40 | /** |
| 41 | * State after non-blocking input source has indicated that no more input |
| 42 | * is forthcoming AND we have exhausted all the input |
| 43 | */ |
| 44 | protected final static int MAJOR_CLOSED = 5; |
| 45 | |
| 46 | // // // "Sub-states" |
| 47 | |
| 48 | protected final static int MINOR_FIELD_NAME = 1; |
| 49 | |
| 50 | protected final static int MINOR_VALUE_NUMBER = 6; |
| 51 | |
| 52 | protected final static int MINOR_VALUE_STRING = 15; |
| 53 | |
| 54 | protected final static int MINOR_VALUE_TOKEN_NULL = 15; |
| 55 | protected final static int MINOR_VALUE_TOKEN_TRUE = 15; |
| 56 | protected final static int MINOR_VALUE_TOKEN_FALSE = 15; |
| 57 | |
| 58 | /* |
| 59 | /********************************************************************** |
| 60 | /* Helper objects, symbols (field names) |
| 61 | /********************************************************************** |
| 62 | */ |
| 63 | |
| 64 | /** |
| 65 | * Symbol table that contains field names encountered so far |
| 66 | */ |
| 67 | final protected ByteQuadsCanonicalizer _symbols; |
| 68 | |
| 69 | /** |
| 70 | * Temporary buffer used for name parsing. |
| 71 | */ |
| 72 | protected int[] _quadBuffer = NO_INTS; |
| 73 | |
| 74 | /** |
| 75 | * Quads used for hash calculation |
| 76 | */ |
| 77 | protected int _quad1, _quad2; |
| 78 | |
| 79 | /* |
| 80 | /********************************************************************** |
| 81 | /* Additional parsing state |
| 82 | /********************************************************************** |
| 83 | */ |
| 84 | |
| 85 | /** |
| 86 | * Current main decoding state |
| 87 | */ |
| 88 | protected int _majorState; |
| 89 | |
| 90 | /** |
| 91 | * Addition indicator within state; contextually relevant for just that state |
| 92 | */ |
| 93 | protected int _minorState; |
| 94 | |
| 95 | /** |
| 96 | * Value of {@link #_majorState} after completing a scalar value |
| 97 | */ |
| 98 | protected int _majorStateAfterValue; |
| 99 | |
| 100 | /** |
| 101 | * Flag that is sent when calling application indicates that there will |
| 102 | * be no more input to parse. |
| 103 | */ |
| 104 | protected boolean _endOfInput = false; |
| 105 | |
| 106 | /* |
| 107 | /********************************************************************** |
| 108 | /* Other buffering |
| 109 | /********************************************************************** |
| 110 | */ |
| 111 | |
| 112 | /** |
| 113 | * Temporary buffer for holding content if input not contiguous (but can |
| 114 | * fit in buffer) |
| 115 | */ |
| 116 | protected byte[] _inputCopy; |
| 117 | |
| 118 | /** |
| 119 | * Number of bytes buffered in <code>_inputCopy</code> |
| 120 | */ |
| 121 | protected int _inputCopyLen; |
| 122 | |
| 123 | /** |
| 124 | * Temporary storage for 32-bit values (int, float), as well as length markers |
| 125 | * for length-prefixed values. |
| 126 | */ |
| 127 | protected int _pending32; |
| 128 | |
| 129 | /** |
| 130 | * Temporary storage for 64-bit values (long, double), secondary storage |
| 131 | * for some other things (scale of BigDecimal values) |
| 132 | */ |
| 133 | protected long _pending64; |
| 134 | |
| 135 | /* |
| 136 | /********************************************************************** |
| 137 | /* Life-cycle |
| 138 | /********************************************************************** |
| 139 | */ |
| 140 | |
Tatu Saloranta | 81fb43c | 2017-05-17 18:25:34 -0700 | [diff] [blame^] | 141 | public NonBlockingJsonParserBase(IOContext ctxt, int parserFeatures, |
Tatu Saloranta | 056bf5e | 2017-05-14 23:59:00 -0700 | [diff] [blame] | 142 | ByteQuadsCanonicalizer sym) |
| 143 | { |
| 144 | super(ctxt, parserFeatures); |
| 145 | _symbols = sym; |
| 146 | // We don't need a lot; for most things maximum known a-priori length below 70 bytes |
| 147 | _inputCopy = ctxt.allocReadIOBuffer(500); |
| 148 | |
| 149 | _currToken = null; |
| 150 | _majorState = MAJOR_INITIAL; |
| 151 | } |
| 152 | |
| 153 | @Override |
| 154 | public ObjectCodec getCodec() { |
| 155 | return null; |
| 156 | } |
| 157 | |
| 158 | @Override |
| 159 | public void setCodec(ObjectCodec c) { |
| 160 | throw new UnsupportedOperationException("Can not use ObjectMapper with non-blocking parser"); |
| 161 | } |
| 162 | |
| 163 | /** |
| 164 | * @since 2.9 |
| 165 | */ |
| 166 | @Override |
| 167 | public boolean canParseAsync() { return true; } |
| 168 | |
| 169 | /* |
| 170 | /********************************************************** |
Tatu Saloranta | 81fb43c | 2017-05-17 18:25:34 -0700 | [diff] [blame^] | 171 | /* Test support |
| 172 | /********************************************************** |
| 173 | */ |
| 174 | |
| 175 | protected ByteQuadsCanonicalizer symbolTableForTests() { |
| 176 | return _symbols; |
| 177 | } |
| 178 | |
| 179 | /* |
| 180 | /********************************************************** |
Tatu Saloranta | 056bf5e | 2017-05-14 23:59:00 -0700 | [diff] [blame] | 181 | /* Abstract methods from JsonParser |
| 182 | /********************************************************** |
| 183 | */ |
| 184 | |
| 185 | @Override |
| 186 | public abstract int releaseBuffered(OutputStream out) throws IOException; |
| 187 | |
| 188 | @Override |
| 189 | public Object getInputSource() { |
| 190 | // since input is "pushed", to traditional source... |
| 191 | return null; |
| 192 | } |
| 193 | |
| 194 | @Override |
| 195 | protected void _closeInput() throws IOException { |
| 196 | // nothing to do here |
| 197 | } |
| 198 | |
| 199 | /* |
| 200 | /********************************************************************** |
| 201 | /* Overridden methods |
| 202 | /********************************************************************** |
| 203 | */ |
| 204 | |
| 205 | @Override |
| 206 | public boolean hasTextCharacters() |
| 207 | { |
| 208 | if (_currToken == JsonToken.VALUE_STRING) { |
| 209 | // yes; is or can be made available efficiently as char[] |
| 210 | return _textBuffer.hasTextAsCharacters(); |
| 211 | } |
| 212 | if (_currToken == JsonToken.FIELD_NAME) { |
| 213 | // not necessarily; possible but: |
| 214 | return _nameCopied; |
| 215 | } |
| 216 | // other types, no benefit from accessing as char[] |
| 217 | return false; |
| 218 | } |
| 219 | |
| 220 | /* |
| 221 | /********************************************************************** |
| 222 | /* Public API, access to token information, text |
| 223 | /********************************************************************** |
| 224 | */ |
| 225 | |
| 226 | /** |
| 227 | * Method for accessing textual representation of the current event; |
| 228 | * if no current event (before first call to {@link #nextToken}, or |
| 229 | * after encountering end-of-input), returns null. |
| 230 | * Method can be called for any event. |
| 231 | */ |
| 232 | @Override |
| 233 | public String getText() throws IOException |
| 234 | { |
| 235 | if (_currToken == JsonToken.VALUE_STRING) { |
| 236 | return _textBuffer.contentsAsString(); |
| 237 | } |
| 238 | JsonToken t = _currToken; |
| 239 | if (t == null || _currToken == JsonToken.NOT_AVAILABLE) { // null only before/after document |
| 240 | return null; |
| 241 | } |
| 242 | if (t == JsonToken.FIELD_NAME) { |
| 243 | return _parsingContext.getCurrentName(); |
| 244 | } |
| 245 | if (t.isNumeric()) { |
| 246 | // TODO: optimize? |
| 247 | return getNumberValue().toString(); |
| 248 | } |
| 249 | return _currToken.asString(); |
| 250 | } |
| 251 | |
| 252 | @Override |
| 253 | public char[] getTextCharacters() throws IOException |
| 254 | { |
| 255 | switch (currentTokenId()) { |
| 256 | case JsonTokenId.ID_STRING: |
| 257 | return _textBuffer.getTextBuffer(); |
| 258 | case JsonTokenId.ID_FIELD_NAME: |
| 259 | if (!_nameCopied) { |
| 260 | String name = _parsingContext.getCurrentName(); |
| 261 | int nameLen = name.length(); |
| 262 | if (_nameCopyBuffer == null) { |
| 263 | _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen); |
| 264 | } else if (_nameCopyBuffer.length < nameLen) { |
| 265 | _nameCopyBuffer = new char[nameLen]; |
| 266 | } |
| 267 | name.getChars(0, nameLen, _nameCopyBuffer, 0); |
| 268 | _nameCopied = true; |
| 269 | } |
| 270 | return _nameCopyBuffer; |
| 271 | case JsonTokenId.ID_NUMBER_INT: |
| 272 | case JsonTokenId.ID_NUMBER_FLOAT: |
| 273 | return getNumberValue().toString().toCharArray(); |
| 274 | case JsonTokenId.ID_NO_TOKEN: |
| 275 | case JsonTokenId.ID_NOT_AVAILABLE: |
| 276 | return null; |
| 277 | default: |
| 278 | return _currToken.asCharArray(); |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | @Override |
| 283 | public int getTextLength() throws IOException |
| 284 | { |
| 285 | switch (currentTokenId()) { |
| 286 | case JsonTokenId.ID_STRING: |
| 287 | return _textBuffer.size(); |
| 288 | case JsonTokenId.ID_FIELD_NAME: |
| 289 | return _parsingContext.getCurrentName().length(); |
| 290 | case JsonTokenId.ID_NUMBER_INT: |
| 291 | case JsonTokenId.ID_NUMBER_FLOAT: |
| 292 | return getNumberValue().toString().length(); |
| 293 | case JsonTokenId.ID_NO_TOKEN: |
| 294 | case JsonTokenId.ID_NOT_AVAILABLE: |
| 295 | return 0; // or throw exception? |
| 296 | default: |
| 297 | return _currToken.asCharArray().length; |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | @Override |
Tatu Saloranta | 81fb43c | 2017-05-17 18:25:34 -0700 | [diff] [blame^] | 302 | public int getTextOffset() throws IOException { |
Tatu Saloranta | 056bf5e | 2017-05-14 23:59:00 -0700 | [diff] [blame] | 303 | return 0; |
| 304 | } |
| 305 | |
Tatu Saloranta | 81fb43c | 2017-05-17 18:25:34 -0700 | [diff] [blame^] | 306 | @Override |
| 307 | public int getText(Writer w) throws IOException |
| 308 | { |
| 309 | if (_currToken == JsonToken.VALUE_STRING) { |
| 310 | return _textBuffer.contentsToWriter(w); |
| 311 | } |
| 312 | if (_currToken == JsonToken.NOT_AVAILABLE) { |
| 313 | _reportError("Current token not available: can not call this method"); |
| 314 | } |
| 315 | // otherwise default handling works fine |
| 316 | return super.getText(w); |
| 317 | } |
| 318 | |
Tatu Saloranta | 056bf5e | 2017-05-14 23:59:00 -0700 | [diff] [blame] | 319 | /* |
| 320 | /********************************************************************** |
| 321 | /* Public API, access to token information, binary |
| 322 | /********************************************************************** |
| 323 | */ |
| 324 | |
| 325 | @Override |
| 326 | public byte[] getBinaryValue(Base64Variant b64variant) throws IOException |
| 327 | { |
| 328 | if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) { |
| 329 | _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken); |
| 330 | } |
| 331 | return _binaryValue; |
| 332 | } |
| 333 | |
| 334 | @Override |
| 335 | public Object getEmbeddedObject() throws IOException |
| 336 | { |
| 337 | if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) { |
| 338 | return _binaryValue; |
| 339 | } |
| 340 | return null; |
| 341 | } |
| 342 | |
| 343 | @Override |
| 344 | public int readBinaryValue(Base64Variant b64variant, OutputStream out) |
| 345 | throws IOException { |
| 346 | if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) { |
| 347 | _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken); |
| 348 | } |
| 349 | out.write(_binaryValue); |
| 350 | return _binaryValue.length; |
| 351 | } |
| 352 | |
| 353 | /* |
| 354 | /********************************************************************** |
| 355 | /* Internal methods, field name parsing |
| 356 | /********************************************************************** |
| 357 | */ |
| 358 | |
| 359 | // Helper method for trying to find specified encoded UTF-8 byte sequence |
| 360 | // from symbol table; if successful avoids actual decoding to String |
| 361 | protected final String _findDecodedFromSymbols(byte[] inBuf, int inPtr, int len) throws IOException |
| 362 | { |
| 363 | // First: maybe we already have this name decoded? |
| 364 | if (len < 5) { |
| 365 | int q = inBuf[inPtr] & 0xFF; |
| 366 | if (--len > 0) { |
| 367 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 368 | if (--len > 0) { |
| 369 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 370 | if (--len > 0) { |
| 371 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 372 | } |
| 373 | } |
| 374 | } |
| 375 | _quad1 = q; |
| 376 | return _symbols.findName(q); |
| 377 | } |
| 378 | if (len < 9) { |
| 379 | // First quadbyte is easy |
| 380 | int q1 = (inBuf[inPtr] & 0xFF) << 8; |
| 381 | q1 += (inBuf[++inPtr] & 0xFF); |
| 382 | q1 <<= 8; |
| 383 | q1 += (inBuf[++inPtr] & 0xFF); |
| 384 | q1 <<= 8; |
| 385 | q1 += (inBuf[++inPtr] & 0xFF); |
| 386 | int q2 = (inBuf[++inPtr] & 0xFF); |
| 387 | len -= 5; |
| 388 | if (len > 0) { |
| 389 | q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF); |
| 390 | if (--len > 0) { |
| 391 | q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF); |
| 392 | if (--len > 0) { |
| 393 | q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF); |
| 394 | } |
| 395 | } |
| 396 | } |
| 397 | _quad1 = q1; |
| 398 | _quad2 = q2; |
| 399 | return _symbols.findName(q1, q2); |
| 400 | } |
| 401 | return _findDecodedLonger(inBuf, inPtr, len); |
| 402 | } |
| 403 | |
| 404 | // Method for locating names longer than 8 bytes (in UTF-8) |
| 405 | private final String _findDecodedLonger(byte[] inBuf, int inPtr, int len) throws IOException |
| 406 | { |
| 407 | // first, need enough buffer to store bytes as ints: |
| 408 | { |
| 409 | int bufLen = (len + 3) >> 2; |
| 410 | if (bufLen > _quadBuffer.length) { |
| 411 | _quadBuffer = Arrays.copyOf(_quadBuffer, bufLen+4); |
| 412 | } |
| 413 | } |
| 414 | // then decode, full quads first |
| 415 | int offset = 0; |
| 416 | do { |
| 417 | int q = (inBuf[inPtr++] & 0xFF) << 8; |
| 418 | q |= inBuf[inPtr++] & 0xFF; |
| 419 | q <<= 8; |
| 420 | q |= inBuf[inPtr++] & 0xFF; |
| 421 | q <<= 8; |
| 422 | q |= inBuf[inPtr++] & 0xFF; |
| 423 | _quadBuffer[offset++] = q; |
| 424 | } while ((len -= 4) > 3); |
| 425 | // and then leftovers |
| 426 | if (len > 0) { |
| 427 | int q = inBuf[inPtr] & 0xFF; |
| 428 | if (--len > 0) { |
| 429 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 430 | if (--len > 0) { |
| 431 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 432 | } |
| 433 | } |
| 434 | _quadBuffer[offset++] = q; |
| 435 | } |
| 436 | return _symbols.findName(_quadBuffer, offset); |
| 437 | } |
| 438 | |
| 439 | protected final String _addDecodedToSymbols(int len, String name) |
| 440 | { |
| 441 | if (len < 5) { |
Tatu Saloranta | 81fb43c | 2017-05-17 18:25:34 -0700 | [diff] [blame^] | 442 | return _symbols.addName(name, _quad1); |
Tatu Saloranta | 056bf5e | 2017-05-14 23:59:00 -0700 | [diff] [blame] | 443 | } |
| 444 | if (len < 9) { |
| 445 | return _symbols.addName(name, _quad1, _quad2); |
| 446 | } |
| 447 | int qlen = (len + 3) >> 2; |
| 448 | return _symbols.addName(name, _quadBuffer, qlen); |
| 449 | } |
| 450 | |
| 451 | /* |
| 452 | /********************************************************************** |
| 453 | /* Internal methods, state changes |
| 454 | /********************************************************************** |
| 455 | */ |
| 456 | |
| 457 | /** |
| 458 | * Helper method called at point when all input has been exhausted and |
| 459 | * input feeder has indicated no more input will be forthcoming. |
| 460 | */ |
| 461 | protected final JsonToken _eofAsNextToken() throws IOException { |
| 462 | _majorState = MAJOR_CLOSED; |
| 463 | if (!_parsingContext.inRoot()) { |
| 464 | _handleEOF(); |
| 465 | } |
| 466 | close(); |
| 467 | return (_currToken = null); |
| 468 | } |
| 469 | |
| 470 | protected final JsonToken _valueComplete(JsonToken t) throws IOException |
| 471 | { |
| 472 | _majorState = _majorStateAfterValue; |
| 473 | _currToken = t; |
| 474 | return t; |
| 475 | } |
| 476 | |
| 477 | /* |
| 478 | /********************************************************************** |
| 479 | /* Internal methods, error reporting |
| 480 | /********************************************************************** |
| 481 | */ |
| 482 | |
| 483 | protected void _reportInvalidInitial(int mask) throws JsonParseException { |
| 484 | _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask)); |
| 485 | } |
| 486 | |
| 487 | protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException { |
| 488 | _inputPtr = ptr; |
| 489 | _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask)); |
| 490 | } |
| 491 | } |