Tatu Saloranta | 056bf5e | 2017-05-14 23:59:00 -0700 | [diff] [blame^] | 1 | package com.fasterxml.jackson.core.json.async; |
| 2 | |
| 3 | import java.io.*; |
| 4 | import java.util.Arrays; |
| 5 | |
| 6 | import com.fasterxml.jackson.core.*; |
| 7 | import com.fasterxml.jackson.core.async.NonBlockingInputFeeder; |
| 8 | import com.fasterxml.jackson.core.base.ParserBase; |
| 9 | import com.fasterxml.jackson.core.io.IOContext; |
| 10 | import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer; |
| 11 | |
| 12 | /** |
| 13 | * Intermediate base class for non-blocking JSON parsers. |
| 14 | */ |
| 15 | public abstract class NonBlockingParserBase<F extends NonBlockingInputFeeder> |
| 16 | extends ParserBase |
| 17 | { |
| 18 | /* |
| 19 | /********************************************************************** |
| 20 | /* Major state constants |
| 21 | /********************************************************************** |
| 22 | */ |
| 23 | |
| 24 | /** |
| 25 | * State right after parser has been constructed, before seeing the first byte |
| 26 | * to know if there's header. |
| 27 | */ |
| 28 | protected final static int MAJOR_INITIAL = 0; |
| 29 | |
| 30 | /** |
| 31 | * State right after parser a root value has been |
| 32 | * finished, but next token has not yet been recognized. |
| 33 | */ |
| 34 | protected final static int MAJOR_ROOT = 1; |
| 35 | |
| 36 | protected final static int MAJOR_OBJECT_FIELD = 2; |
| 37 | protected final static int MAJOR_OBJECT_VALUE = 3; |
| 38 | |
| 39 | protected final static int MAJOR_ARRAY_ELEMENT = 4; |
| 40 | |
| 41 | /** |
| 42 | * State after non-blocking input source has indicated that no more input |
| 43 | * is forthcoming AND we have exhausted all the input |
| 44 | */ |
| 45 | protected final static int MAJOR_CLOSED = 5; |
| 46 | |
| 47 | // // // "Sub-states" |
| 48 | |
| 49 | protected final static int MINOR_FIELD_NAME = 1; |
| 50 | |
| 51 | protected final static int MINOR_VALUE_NUMBER = 6; |
| 52 | |
| 53 | protected final static int MINOR_VALUE_STRING = 15; |
| 54 | |
| 55 | protected final static int MINOR_VALUE_TOKEN_NULL = 15; |
| 56 | protected final static int MINOR_VALUE_TOKEN_TRUE = 15; |
| 57 | protected final static int MINOR_VALUE_TOKEN_FALSE = 15; |
| 58 | |
| 59 | /* |
| 60 | /********************************************************************** |
| 61 | /* Helper objects, symbols (field names) |
| 62 | /********************************************************************** |
| 63 | */ |
| 64 | |
| 65 | /** |
| 66 | * Symbol table that contains field names encountered so far |
| 67 | */ |
| 68 | final protected ByteQuadsCanonicalizer _symbols; |
| 69 | |
| 70 | /** |
| 71 | * Temporary buffer used for name parsing. |
| 72 | */ |
| 73 | protected int[] _quadBuffer = NO_INTS; |
| 74 | |
| 75 | /** |
| 76 | * Quads used for hash calculation |
| 77 | */ |
| 78 | protected int _quad1, _quad2; |
| 79 | |
| 80 | /* |
| 81 | /********************************************************************** |
| 82 | /* Additional parsing state |
| 83 | /********************************************************************** |
| 84 | */ |
| 85 | |
| 86 | /** |
| 87 | * Current main decoding state |
| 88 | */ |
| 89 | protected int _majorState; |
| 90 | |
| 91 | /** |
| 92 | * Addition indicator within state; contextually relevant for just that state |
| 93 | */ |
| 94 | protected int _minorState; |
| 95 | |
| 96 | /** |
| 97 | * Value of {@link #_majorState} after completing a scalar value |
| 98 | */ |
| 99 | protected int _majorStateAfterValue; |
| 100 | |
| 101 | /** |
| 102 | * Flag that is sent when calling application indicates that there will |
| 103 | * be no more input to parse. |
| 104 | */ |
| 105 | protected boolean _endOfInput = false; |
| 106 | |
| 107 | /* |
| 108 | /********************************************************************** |
| 109 | /* Other buffering |
| 110 | /********************************************************************** |
| 111 | */ |
| 112 | |
| 113 | /** |
| 114 | * Temporary buffer for holding content if input not contiguous (but can |
| 115 | * fit in buffer) |
| 116 | */ |
| 117 | protected byte[] _inputCopy; |
| 118 | |
| 119 | /** |
| 120 | * Number of bytes buffered in <code>_inputCopy</code> |
| 121 | */ |
| 122 | protected int _inputCopyLen; |
| 123 | |
| 124 | /** |
| 125 | * Temporary storage for 32-bit values (int, float), as well as length markers |
| 126 | * for length-prefixed values. |
| 127 | */ |
| 128 | protected int _pending32; |
| 129 | |
| 130 | /** |
| 131 | * Temporary storage for 64-bit values (long, double), secondary storage |
| 132 | * for some other things (scale of BigDecimal values) |
| 133 | */ |
| 134 | protected long _pending64; |
| 135 | |
| 136 | /* |
| 137 | /********************************************************************** |
| 138 | /* Life-cycle |
| 139 | /********************************************************************** |
| 140 | */ |
| 141 | |
| 142 | public NonBlockingParserBase(IOContext ctxt, int parserFeatures, |
| 143 | ByteQuadsCanonicalizer sym) |
| 144 | { |
| 145 | super(ctxt, parserFeatures); |
| 146 | _symbols = sym; |
| 147 | // We don't need a lot; for most things maximum known a-priori length below 70 bytes |
| 148 | _inputCopy = ctxt.allocReadIOBuffer(500); |
| 149 | |
| 150 | _currToken = null; |
| 151 | _majorState = MAJOR_INITIAL; |
| 152 | } |
| 153 | |
| 154 | @Override |
| 155 | public ObjectCodec getCodec() { |
| 156 | return null; |
| 157 | } |
| 158 | |
| 159 | @Override |
| 160 | public void setCodec(ObjectCodec c) { |
| 161 | throw new UnsupportedOperationException("Can not use ObjectMapper with non-blocking parser"); |
| 162 | } |
| 163 | |
| 164 | /** |
| 165 | * @since 2.9 |
| 166 | */ |
| 167 | @Override |
| 168 | public boolean canParseAsync() { return true; } |
| 169 | |
| 170 | /* |
| 171 | /********************************************************** |
| 172 | /* Abstract methods from JsonParser |
| 173 | /********************************************************** |
| 174 | */ |
| 175 | |
| 176 | @Override |
| 177 | public abstract int releaseBuffered(OutputStream out) throws IOException; |
| 178 | |
| 179 | @Override |
| 180 | public Object getInputSource() { |
| 181 | // since input is "pushed", to traditional source... |
| 182 | return null; |
| 183 | } |
| 184 | |
| 185 | @Override |
| 186 | protected void _closeInput() throws IOException { |
| 187 | // nothing to do here |
| 188 | } |
| 189 | |
| 190 | /* |
| 191 | /********************************************************************** |
| 192 | /* Overridden methods |
| 193 | /********************************************************************** |
| 194 | */ |
| 195 | |
| 196 | @Override |
| 197 | public boolean hasTextCharacters() |
| 198 | { |
| 199 | if (_currToken == JsonToken.VALUE_STRING) { |
| 200 | // yes; is or can be made available efficiently as char[] |
| 201 | return _textBuffer.hasTextAsCharacters(); |
| 202 | } |
| 203 | if (_currToken == JsonToken.FIELD_NAME) { |
| 204 | // not necessarily; possible but: |
| 205 | return _nameCopied; |
| 206 | } |
| 207 | // other types, no benefit from accessing as char[] |
| 208 | return false; |
| 209 | } |
| 210 | |
| 211 | /* |
| 212 | /********************************************************************** |
| 213 | /* Public API, access to token information, text |
| 214 | /********************************************************************** |
| 215 | */ |
| 216 | |
| 217 | /** |
| 218 | * Method for accessing textual representation of the current event; |
| 219 | * if no current event (before first call to {@link #nextToken}, or |
| 220 | * after encountering end-of-input), returns null. |
| 221 | * Method can be called for any event. |
| 222 | */ |
| 223 | @Override |
| 224 | public String getText() throws IOException |
| 225 | { |
| 226 | if (_currToken == JsonToken.VALUE_STRING) { |
| 227 | return _textBuffer.contentsAsString(); |
| 228 | } |
| 229 | JsonToken t = _currToken; |
| 230 | if (t == null || _currToken == JsonToken.NOT_AVAILABLE) { // null only before/after document |
| 231 | return null; |
| 232 | } |
| 233 | if (t == JsonToken.FIELD_NAME) { |
| 234 | return _parsingContext.getCurrentName(); |
| 235 | } |
| 236 | if (t.isNumeric()) { |
| 237 | // TODO: optimize? |
| 238 | return getNumberValue().toString(); |
| 239 | } |
| 240 | return _currToken.asString(); |
| 241 | } |
| 242 | |
| 243 | @Override |
| 244 | public char[] getTextCharacters() throws IOException |
| 245 | { |
| 246 | switch (currentTokenId()) { |
| 247 | case JsonTokenId.ID_STRING: |
| 248 | return _textBuffer.getTextBuffer(); |
| 249 | case JsonTokenId.ID_FIELD_NAME: |
| 250 | if (!_nameCopied) { |
| 251 | String name = _parsingContext.getCurrentName(); |
| 252 | int nameLen = name.length(); |
| 253 | if (_nameCopyBuffer == null) { |
| 254 | _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen); |
| 255 | } else if (_nameCopyBuffer.length < nameLen) { |
| 256 | _nameCopyBuffer = new char[nameLen]; |
| 257 | } |
| 258 | name.getChars(0, nameLen, _nameCopyBuffer, 0); |
| 259 | _nameCopied = true; |
| 260 | } |
| 261 | return _nameCopyBuffer; |
| 262 | case JsonTokenId.ID_NUMBER_INT: |
| 263 | case JsonTokenId.ID_NUMBER_FLOAT: |
| 264 | return getNumberValue().toString().toCharArray(); |
| 265 | case JsonTokenId.ID_NO_TOKEN: |
| 266 | case JsonTokenId.ID_NOT_AVAILABLE: |
| 267 | return null; |
| 268 | default: |
| 269 | return _currToken.asCharArray(); |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | @Override |
| 274 | public int getTextLength() throws IOException |
| 275 | { |
| 276 | switch (currentTokenId()) { |
| 277 | case JsonTokenId.ID_STRING: |
| 278 | return _textBuffer.size(); |
| 279 | case JsonTokenId.ID_FIELD_NAME: |
| 280 | return _parsingContext.getCurrentName().length(); |
| 281 | case JsonTokenId.ID_NUMBER_INT: |
| 282 | case JsonTokenId.ID_NUMBER_FLOAT: |
| 283 | return getNumberValue().toString().length(); |
| 284 | case JsonTokenId.ID_NO_TOKEN: |
| 285 | case JsonTokenId.ID_NOT_AVAILABLE: |
| 286 | return 0; // or throw exception? |
| 287 | default: |
| 288 | return _currToken.asCharArray().length; |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | @Override |
| 293 | public int getTextOffset() throws IOException |
| 294 | { |
| 295 | return 0; |
| 296 | } |
| 297 | |
| 298 | // public abstract int getText(Writer w) throws IOException; |
| 299 | |
| 300 | /* |
| 301 | /********************************************************************** |
| 302 | /* Public API, access to token information, binary |
| 303 | /********************************************************************** |
| 304 | */ |
| 305 | |
| 306 | @Override |
| 307 | public byte[] getBinaryValue(Base64Variant b64variant) throws IOException |
| 308 | { |
| 309 | if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) { |
| 310 | _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken); |
| 311 | } |
| 312 | return _binaryValue; |
| 313 | } |
| 314 | |
| 315 | @Override |
| 316 | public Object getEmbeddedObject() throws IOException |
| 317 | { |
| 318 | if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) { |
| 319 | return _binaryValue; |
| 320 | } |
| 321 | return null; |
| 322 | } |
| 323 | |
| 324 | @Override |
| 325 | public int readBinaryValue(Base64Variant b64variant, OutputStream out) |
| 326 | throws IOException { |
| 327 | if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) { |
| 328 | _reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken); |
| 329 | } |
| 330 | out.write(_binaryValue); |
| 331 | return _binaryValue.length; |
| 332 | } |
| 333 | |
| 334 | /* |
| 335 | /********************************************************************** |
| 336 | /* Internal methods, field name parsing |
| 337 | /********************************************************************** |
| 338 | */ |
| 339 | |
| 340 | // Helper method for trying to find specified encoded UTF-8 byte sequence |
| 341 | // from symbol table; if successful avoids actual decoding to String |
| 342 | protected final String _findDecodedFromSymbols(byte[] inBuf, int inPtr, int len) throws IOException |
| 343 | { |
| 344 | // First: maybe we already have this name decoded? |
| 345 | if (len < 5) { |
| 346 | int q = inBuf[inPtr] & 0xFF; |
| 347 | if (--len > 0) { |
| 348 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 349 | if (--len > 0) { |
| 350 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 351 | if (--len > 0) { |
| 352 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 353 | } |
| 354 | } |
| 355 | } |
| 356 | _quad1 = q; |
| 357 | return _symbols.findName(q); |
| 358 | } |
| 359 | if (len < 9) { |
| 360 | // First quadbyte is easy |
| 361 | int q1 = (inBuf[inPtr] & 0xFF) << 8; |
| 362 | q1 += (inBuf[++inPtr] & 0xFF); |
| 363 | q1 <<= 8; |
| 364 | q1 += (inBuf[++inPtr] & 0xFF); |
| 365 | q1 <<= 8; |
| 366 | q1 += (inBuf[++inPtr] & 0xFF); |
| 367 | int q2 = (inBuf[++inPtr] & 0xFF); |
| 368 | len -= 5; |
| 369 | if (len > 0) { |
| 370 | q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF); |
| 371 | if (--len > 0) { |
| 372 | q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF); |
| 373 | if (--len > 0) { |
| 374 | q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF); |
| 375 | } |
| 376 | } |
| 377 | } |
| 378 | _quad1 = q1; |
| 379 | _quad2 = q2; |
| 380 | return _symbols.findName(q1, q2); |
| 381 | } |
| 382 | return _findDecodedLonger(inBuf, inPtr, len); |
| 383 | } |
| 384 | |
| 385 | // Method for locating names longer than 8 bytes (in UTF-8) |
| 386 | private final String _findDecodedLonger(byte[] inBuf, int inPtr, int len) throws IOException |
| 387 | { |
| 388 | // first, need enough buffer to store bytes as ints: |
| 389 | { |
| 390 | int bufLen = (len + 3) >> 2; |
| 391 | if (bufLen > _quadBuffer.length) { |
| 392 | _quadBuffer = Arrays.copyOf(_quadBuffer, bufLen+4); |
| 393 | } |
| 394 | } |
| 395 | // then decode, full quads first |
| 396 | int offset = 0; |
| 397 | do { |
| 398 | int q = (inBuf[inPtr++] & 0xFF) << 8; |
| 399 | q |= inBuf[inPtr++] & 0xFF; |
| 400 | q <<= 8; |
| 401 | q |= inBuf[inPtr++] & 0xFF; |
| 402 | q <<= 8; |
| 403 | q |= inBuf[inPtr++] & 0xFF; |
| 404 | _quadBuffer[offset++] = q; |
| 405 | } while ((len -= 4) > 3); |
| 406 | // and then leftovers |
| 407 | if (len > 0) { |
| 408 | int q = inBuf[inPtr] & 0xFF; |
| 409 | if (--len > 0) { |
| 410 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 411 | if (--len > 0) { |
| 412 | q = (q << 8) + (inBuf[++inPtr] & 0xFF); |
| 413 | } |
| 414 | } |
| 415 | _quadBuffer[offset++] = q; |
| 416 | } |
| 417 | return _symbols.findName(_quadBuffer, offset); |
| 418 | } |
| 419 | |
| 420 | protected final String _addDecodedToSymbols(int len, String name) |
| 421 | { |
| 422 | if (len < 5) { |
| 423 | return _symbols.addName(name, _quad1, 0); |
| 424 | } |
| 425 | if (len < 9) { |
| 426 | return _symbols.addName(name, _quad1, _quad2); |
| 427 | } |
| 428 | int qlen = (len + 3) >> 2; |
| 429 | return _symbols.addName(name, _quadBuffer, qlen); |
| 430 | } |
| 431 | |
| 432 | /* |
| 433 | /********************************************************************** |
| 434 | /* Internal methods, state changes |
| 435 | /********************************************************************** |
| 436 | */ |
| 437 | |
| 438 | /** |
| 439 | * Helper method called at point when all input has been exhausted and |
| 440 | * input feeder has indicated no more input will be forthcoming. |
| 441 | */ |
| 442 | protected final JsonToken _eofAsNextToken() throws IOException { |
| 443 | _majorState = MAJOR_CLOSED; |
| 444 | if (!_parsingContext.inRoot()) { |
| 445 | _handleEOF(); |
| 446 | } |
| 447 | close(); |
| 448 | return (_currToken = null); |
| 449 | } |
| 450 | |
| 451 | protected final JsonToken _valueComplete(JsonToken t) throws IOException |
| 452 | { |
| 453 | _majorState = _majorStateAfterValue; |
| 454 | _currToken = t; |
| 455 | return t; |
| 456 | } |
| 457 | |
| 458 | /* |
| 459 | /********************************************************************** |
| 460 | /* Internal methods, error reporting |
| 461 | /********************************************************************** |
| 462 | */ |
| 463 | |
| 464 | protected void _reportInvalidInitial(int mask) throws JsonParseException { |
| 465 | _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask)); |
| 466 | } |
| 467 | |
| 468 | protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException { |
| 469 | _inputPtr = ptr; |
| 470 | _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask)); |
| 471 | } |
| 472 | } |