src/main/java/com/fasterxml/jackson/core/sym/ByteQuadsCanonicalizer.java - platform/external/jackson-core - Gitiles

 package com.fasterxml.jackson.core.sym;

 import java.util.Arrays;
 import java.util.concurrent.atomic.AtomicReference;

 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.util.InternCache;

 /**
  * Replacement for <code>BytesToNameCanonicalizer</code> which aims at more localized
  * memory access due to flattening of name quad data.
  * Performance improvement modest for simple JSON document data binding (maybe 3%),
  * but should help more for larger symbol tables, or for binary formats like Smile.
  *
  * @since 2.6
  */
 public final class ByteQuadsCanonicalizer
 {
     /**
      * Initial size of the primary hash area. Each entry consumes 4 ints (16 bytes),
      * and secondary area is same as primary; so default size will use 2kB of memory_tertiaryStart
      * (plus 64x4 or 64x8 (256/512 bytes) for references to Strings, and Strings
      * themselves).
      */
     private static final int DEFAULT_T_SIZE = 64;
 //    private static final int DEFAULT_T_SIZE = 256;

     /**
      * Let's not expand symbol tables past some maximum size;
      * this should protected against OOMEs caused by large documents
      * with unique (~= random) names.
      * Size is in
      */
     private static final int MAX_T_SIZE = 0x10000; // 64k entries == 2M mem hash area

     /**
      * No point in trying to construct tiny tables, just need to resize soon.
      */
     private final static int MIN_HASH_SIZE = 16;

     /**
      * Let's only share reasonably sized symbol tables. Max size set to 3/4 of 8k;
      * this corresponds to 256k main hash index. This should allow for enough distinct
      * names for almost any case, while preventing ballooning for cases where names
      * are unique (or close thereof).
      */
     final static int MAX_ENTRIES_FOR_REUSE = 6000;

     /*
     /**********************************************************
     /* Linkage, needed for merging symbol tables
     /**********************************************************
      */

     /**
      * Reference to the root symbol table, for child tables, so
      * that they can merge table information back as necessary.
      */
     final private ByteQuadsCanonicalizer _parent;

     /**
      * Member that is only used by the root table instance: root
      * passes immutable state info child instances, and children
      * may return new state if they add entries to the table.
      * Child tables do NOT use the reference.
      */
     final private AtomicReference<TableInfo> _tableInfo;

     /**
      * Seed value we use as the base to make hash codes non-static between
      * different runs, but still stable for lifetime of a single symbol table
      * instance.
      * This is done for security reasons, to avoid potential DoS attack via
      * hash collisions.
      */
     final private int _seed;

     /*
     /**********************************************************
     /* Configuration
     /**********************************************************
      */

     /**
      * Whether canonical symbol Strings are to be intern()ed before added
      * to the table or not.
      *<p>
      * NOTE: non-final to allow disabling intern()ing in case of excessive
      * collisions.
      */
     private boolean _intern;

     /**
      * Flag that indicates whether we should throw an exception if enough
      * hash collisions are detected (true); or just worked around (false).
      *
      * @since 2.4
      */
     private final boolean _failOnDoS;

     /*
     /**********************************************************
     /* First, main hash area info
     /**********************************************************
      */

     /**
      * Primary hash information area: consists of <code>2 * _hashSize</code>
      * entries of 16 bytes (4 ints), arranged in a cascading lookup
      * structure (details of which may be tweaked depending on expected rates
      * of collisions).
      */
     private int[] _hashArea;

     /**
      * Number of slots for primary entries within {@link #_hashArea}; which is
      * at most <code>1/8</code> of actual size of the underlying array (4-int slots,
      * primary covers only half of the area; plus, additional area for longer
      * symbols after hash area).
      */
     private int _hashSize;

     /**
      * Offset within {@link #_hashArea} where secondary entries start
      */
     private int _secondaryStart;

     /**
      * Offset within {@link #_hashArea} where tertiary entries start
      */
     private int _tertiaryStart;

     /**
      * Constant that determines size of buckets for tertiary entries:
      * <code>1 &lt;&lt; _tertiaryShift</code> is the size, and shift value
      * is also used for translating from primary offset into
      * tertiary bucket (shift right by <code>4 + _tertiaryShift</code>).
      *<p>
      * Default value is 2, for buckets of 4 slots; grows bigger with
      * bigger table sizes.
      */
     private int _tertiaryShift;

     /**
      * Total number of Strings in the symbol table; only used for child tables.
      */
     private int _count;

     /**
      * Array that contains <code>String</code> instances matching
      * entries in {@link #_hashArea}.
      * Contains nulls for unused entries. Note that this size is twice
      * that of {@link #_hashArea}
      */
     private String[] _names;

     /*
     /**********************************************************
     /* Then information on collisions etc
     /**********************************************************
      */

     /**
      * Pointer to the offset within spill-over area where there is room
      * for more spilled over entries (if any).
      * Spill over area is within fixed-size portion of {@link #_hashArea}.
      */
     private int _spilloverEnd;

     /**
      * Offset within {@link #_hashArea} that follows main slots and contains
      * quads for longer names (13 bytes or longers), and points to the
      * first available int that may be used for appending quads of the next
      * long name.
      * Note that long name area follows immediately after the fixed-size
      * main hash area ({@link #_hashArea}).
      */
     private int _longNameOffset;

     /**
      * This flag is set if, after adding a new entry, it is deemed
      * that a rehash is warranted if any more entries are to be added.
      */
     private transient boolean _needRehash;

     /*
     /**********************************************************
     /* Sharing, versioning
     /**********************************************************
      */

     // // // Which of the buffers may be shared (and are copy-on-write)?

     /**
      * Flag that indicates whether underlying data structures for
      * the main hash area are shared or not. If they are, then they
      * need to be handled in copy-on-write way, i.e. if they need
      * to be modified, a copy needs to be made first; at this point
      * it will not be shared any more, and can be modified.
      *<p>
      * This flag needs to be checked both when adding new main entries,
      * and when adding new collision list queues (i.e. creating a new
      * collision list head entry)
      */
     private boolean _hashShared;

     /*
     /**********************************************************
     /* Life-cycle: constructors
     /**********************************************************
      */

     /**
      * Constructor used for creating per-<code>JsonFactory</code> "root"
      * symbol tables: ones used for merging and sharing common symbols
      *
      * @param sz Initial primary hash area size
      * @param intern Whether Strings contained should be {@link String#intern}ed
      * @param seed Random seed valued used to make it more difficult to cause
      *   collisions (used for collision-based DoS attacks).
      */
     private ByteQuadsCanonicalizer(int sz, boolean intern, int seed, boolean failOnDoS) {
         _parent = null;
         _seed = seed;
         _intern = intern;
         _failOnDoS = failOnDoS;
         // Sanity check: let's now allow hash sizes below certain minimum value
         if (sz < MIN_HASH_SIZE) {
             sz = MIN_HASH_SIZE;
         } else {
             // Also; size must be 2^N; otherwise hash algorithm won't
             // work... so let's just pad it up, if so
             if ((sz & (sz - 1)) != 0) { // only true if it's 2^N
                 int curr = MIN_HASH_SIZE;
                 while (curr < sz) {
                     curr += curr;
                 }
                 sz = curr;
             }
         }
         _tableInfo = new AtomicReference<TableInfo>(TableInfo.createInitial(sz));
     }

     /**
      * Constructor used when creating a child instance
      */
     private ByteQuadsCanonicalizer(ByteQuadsCanonicalizer parent, boolean intern,
             int seed, boolean failOnDoS, TableInfo state)
     {
         _parent = parent;
         _seed = seed;
         _intern = intern;
         _failOnDoS = failOnDoS;
         _tableInfo = null; // not used by child tables

         // Then copy shared state
         _count = state.count;
         _hashSize = state.size;
         _secondaryStart = _hashSize << 2; // right after primary area
         _tertiaryStart = _secondaryStart + (_secondaryStart >> 1); // right after secondary
         _tertiaryShift = state.tertiaryShift;

         _hashArea = state.mainHash;
         _names = state.names;

         _spilloverEnd = state.spilloverEnd;
         _longNameOffset = state.longNameOffset;

         // and then set other state to reflect sharing status
         _needRehash = false;
         _hashShared = true;
     }

     /*
     /**********************************************************
     /* Life-cycle: factory methods, merging
     /**********************************************************
      */

     /**
      * Factory method to call to create a symbol table instance with a
      * randomized seed value.
      */
     public static ByteQuadsCanonicalizer createRoot() {
         // Need to use a variable seed, to thwart hash-collision based attacks.
         // 14-Feb-2017, tatu: Does this actually help?
         long now = System.currentTimeMillis();
         // ensure it's not 0; and might as well require to be odd so:
         int seed = (((int) now) + ((int) (now >>> 32))) | 1;
         return createRoot(seed);
     }

     // Factory method that should only be called from unit tests, where seed
     // value should remain the same.
     protected static ByteQuadsCanonicalizer createRoot(int seed) {
         return new ByteQuadsCanonicalizer(DEFAULT_T_SIZE, true, seed, true);
     }

     /**
      * Factory method used to create actual symbol table instance to
      * use for parsing.
      */
     public ByteQuadsCanonicalizer makeChild(int flags) {
         return new ByteQuadsCanonicalizer(this,
                 JsonFactory.Feature.INTERN_FIELD_NAMES.enabledIn(flags),
                 _seed,
                 JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW.enabledIn(flags),
                 _tableInfo.get());
     }

     /**
      * Method called by the using code to indicate it is done with this instance.
      * This lets instance merge accumulated changes into parent (if need be),
      * safely and efficiently, and without calling code having to know about parent
      * information.
      */
     public void release()
     {
         // we will try to merge if child table has new entries
         if (_parent != null && maybeDirty()) {
             _parent.mergeChild(new TableInfo(this));
             // Let's also mark this instance as dirty, so that just in
             // case release was too early, there's no corruption of possibly shared data.
             _hashShared = true;
         }
     }

     private void mergeChild(TableInfo childState)
     {
         final int childCount = childState.count;
         TableInfo currState = _tableInfo.get();

         // Should usually grow; but occasionally could also shrink if (but only if)
         // collision list overflow ends up clearing some collision lists.
         if (childCount == currState.count) {
             return;
         }

         // One caveat: let's try to avoid problems with degenerate cases of documents with
         // generated "random" names: for these, symbol tables would bloat indefinitely.
         // One way to do this is to just purge tables if they grow
         // too large, and that's what we'll do here.
         if (childCount > MAX_ENTRIES_FOR_REUSE) {
             // At any rate, need to clean up the tables
             childState = TableInfo.createInitial(DEFAULT_T_SIZE);
         }
         _tableInfo.compareAndSet(currState, childState);
     }

     /*
     /**********************************************************
     /* API, accessors
     /**********************************************************
      */

     public int size()
     {
         if (_tableInfo != null) { // root table
             return _tableInfo.get().count;
         }
         // nope, child table
         return _count;
     }

     /**
      * Returns number of primary slots table has currently
      */
     public int bucketCount() { return _hashSize; }

     /**
      * Method called to check to quickly see if a child symbol table
      * may have gotten additional entries. Used for checking to see
      * if a child table should be merged into shared table.
      */
     public boolean maybeDirty() { return !_hashShared; }

     public int hashSeed() { return _seed; }

     /**
      * Method mostly needed by unit tests; calculates number of
      * entries that are in the primary slot set. These are
      * "perfect" entries, accessible with a single lookup
      */
     public int primaryCount()
     {
         int count = 0;
         for (int offset = 3, end = _secondaryStart; offset < end; offset += 4) {
             if (_hashArea[offset] != 0) {
                 ++count;
             }
         }
         return count;
     }

     /**
      * Method mostly needed by unit tests; calculates number of entries
      * in secondary buckets
      */
     public int secondaryCount() {
         int count = 0;
         int offset = _secondaryStart + 3;
         for (int end = _tertiaryStart; offset < end; offset += 4) {
             if (_hashArea[offset] != 0) {
                 ++count;
             }
         }
         return count;
     }

     /**
      * Method mostly needed by unit tests; calculates number of entries
      * in tertiary buckets
      */
     public int tertiaryCount() {
         int count = 0;
         int offset = _tertiaryStart + 3; // to 1.5x, starting point of tertiary
         for (int end = offset + _hashSize; offset < end; offset += 4) {
             if (_hashArea[offset] != 0) {
                 ++count;
             }
         }
         return count;
     }

     /**
      * Method mostly needed by unit tests; calculates number of entries
      * in shared spillover area
      */
     public int spilloverCount() {
         // difference between spillover end, start, divided by 4 (four ints per slot)
         return (_spilloverEnd - _spilloverStart()) >> 2;
     }

     public int totalCount()
     {
         int count = 0;
         for (int offset = 3, end = (_hashSize << 3); offset < end; offset += 4) {
             if (_hashArea[offset] != 0) {
                 ++count;
             }
         }
         return count;
     }

     @Override
     public String toString() {
         int pri = primaryCount();
         int sec = secondaryCount();
         int tert = tertiaryCount();
         int spill = spilloverCount();
         int total = totalCount();
         return String.format("[%s: size=%d, hashSize=%d, %d/%d/%d/%d pri/sec/ter/spill (=%s), total:%d]",
                 getClass().getName(), _count, _hashSize,
                 pri, sec, tert, spill, (pri+sec+tert+spill), total);
     }

     /*
     /**********************************************************
     /* Public API, accessing symbols
     /**********************************************************
      */

     public String findName(int q1)
     {
         int offset = _calcOffset(calcHash(q1));
         // first: primary match?
         final int[] hashArea = _hashArea;

         int len = hashArea[offset+3];

         if (len == 1) {
             if (hashArea[offset] == q1) {
                 return _names[offset >> 2];
             }
         } else if (len == 0) { // empty slot; unlikely but avoid further lookups if so
             return null;
         }
         // secondary? single slot shared by N/2 primaries
         int offset2 = _secondaryStart + ((offset >> 3) << 2);

         len = hashArea[offset2+3];

         if (len == 1) {
             if (hashArea[offset2] == q1) {
                 return _names[offset2 >> 2];
             }
         } else if (len == 0) { // empty slot; unlikely but avoid further lookups if so
             return null;
         }

         // tertiary lookup & spillovers best to offline
         return _findSecondary(offset, q1);
     }

     public String findName(int q1, int q2)
     {
         int offset = _calcOffset(calcHash(q1, q2));

         final int[] hashArea = _hashArea;

         int len = hashArea[offset+3];

         if (len == 2) {
             if ((q1 == hashArea[offset]) && (q2 == hashArea[offset+1])) {
                 return _names[offset >> 2];
             }
         } else if (len == 0) { // empty slot; unlikely but avoid further lookups if so
             return null;
         }
         // secondary?
         int offset2 = _secondaryStart + ((offset >> 3) << 2);

         len = hashArea[offset2+3];

         if (len == 2) {
             if ((q1 == hashArea[offset2]) && (q2 == hashArea[offset2+1])) {
                 return _names[offset2 >> 2];
             }
         } else if (len == 0) { // empty slot? Short-circuit if no more spillovers
             return null;
         }
         return _findSecondary(offset, q1, q2);
     }

     public String findName(int q1, int q2, int q3)
     {
         int offset = _calcOffset(calcHash(q1, q2, q3));
         final int[] hashArea = _hashArea;
         int len = hashArea[offset+3];

         if (len == 3) {
             if ((q1 == hashArea[offset]) && (hashArea[offset+1] == q2) && (hashArea[offset+2] == q3)) {
                 return _names[offset >> 2];
             }
         } else if (len == 0) { // empty slot; unlikely but avoid further lookups if so
             return null;
         }
         // secondary?
         int offset2 = _secondaryStart + ((offset >> 3) << 2);

         len = hashArea[offset2+3];

         if (len == 3) {
             if ((q1 == hashArea[offset2]) && (hashArea[offset2+1] == q2) && (hashArea[offset2+2] == q3)) {
                 return _names[offset2 >> 2];
             }
         } else if (len == 0) { // empty slot? Short-circuit if no more spillovers
             return null;
         }
         return _findSecondary(offset, q1, q2, q3);
     }

     public String findName(int[] q, int qlen)
     {
         /* This version differs significantly, because longer names do not fit within cell.
          * Rather, they contain hash in main slot, and offset+length to extension area
          * that contains actual quads.
          */
         if (qlen < 4) { // another sanity check
             switch (qlen) {
             case 3:
                 return findName(q[0], q[1], q[2]);
             case 2:
                 return findName(q[0], q[1]);
             case 1:
                 return findName(q[0]);
             default: // if 0 ever passed
                 return "";
             }
         }
         final int hash = calcHash(q, qlen);
         int offset = _calcOffset(hash);

         final int[] hashArea = _hashArea;

         final int len = hashArea[offset+3];

         if ((hash == hashArea[offset]) && (len == qlen)) {
             // probable but not guaranteed: verify
             if (_verifyLongName(q, qlen, hashArea[offset+1])) {
                 return _names[offset >> 2];
             }
         }
         if (len == 0) { // empty slot; unlikely but avoid further lookups if so
             return null;
         }
         // secondary?
         int offset2 = _secondaryStart + ((offset >> 3) << 2);

         final int len2 = hashArea[offset2+3];
         if ((hash == hashArea[offset2]) && (len2 == qlen)) {
             if (_verifyLongName(q, qlen, hashArea[offset2+1])) {
                 return _names[offset2 >> 2];
             }
         }
         return _findSecondary(offset, hash, q, qlen);
     }

     private final int _calcOffset(int hash)
     {
         // NOTE: simple for initial impl, but we may want to interleave it a bit
         // in near future
         // So: first, hash into primary hash index
         int ix = hash & (_hashSize-1);
         // keeping in mind we have 4 ints per entry
         return (ix << 2);
     }

     /*
     /**********************************************************
     /* Access from spill-over areas
     /**********************************************************
      */

     private String _findSecondary(int origOffset, int q1)
     {
         // tertiary area division is dynamic. First; its size is N/4 compared to
         // primary hash size; and offsets are for 4 int slots. So to get to logical
         // index would shift by 4. But! Tertiary area is further split into buckets,
         // determined by shift value. And finally, from bucket back into physical offsets
         int offset = _tertiaryStart + ((origOffset >> (_tertiaryShift + 2)) << _tertiaryShift);
         final int[] hashArea = _hashArea;
         final int bucketSize = (1 << _tertiaryShift);
         for (int end = offset + bucketSize; offset < end; offset += 4) {
             int len = hashArea[offset+3];
             if ((q1 == hashArea[offset]) && (1 == len)) {
                 return _names[offset >> 2];
             }
             if (len == 0) {
                 return null;
             }
         }
         // but if tertiary full, check out spill-over area as last resort
         // shared spillover starts at 7/8 of the main hash area
         // (which is sized at 2 * _hashSize), so:
         for (offset = _spilloverStart(); offset < _spilloverEnd; offset += 4) {
             if ((q1 == hashArea[offset]) && (1 == hashArea[offset+3])) {
                 return _names[offset >> 2];
             }
         }
         return null;
     }

     private String _findSecondary(int origOffset, int q1, int q2)
     {
         int offset = _tertiaryStart + ((origOffset >> (_tertiaryShift + 2)) << _tertiaryShift);
         final int[] hashArea = _hashArea;

         final int bucketSize = (1 << _tertiaryShift);
         for (int end = offset + bucketSize; offset < end; offset += 4) {
             int len = hashArea[offset+3];
             if ((q1 == hashArea[offset]) && (q2 == hashArea[offset+1]) && (2 == len)) {
                 return _names[offset >> 2];
             }
             if (len == 0) {
                 return null;
             }
         }
         for (offset = _spilloverStart(); offset < _spilloverEnd; offset += 4) {
             if ((q1 == hashArea[offset]) && (q2 == hashArea[offset+1]) && (2 == hashArea[offset+3])) {
                 return _names[offset >> 2];
             }
         }
         return null;
     }

     private String _findSecondary(int origOffset, int q1, int q2, int q3)
     {
         int offset = _tertiaryStart + ((origOffset >> (_tertiaryShift + 2)) << _tertiaryShift);
         final int[] hashArea = _hashArea;

         final int bucketSize = (1 << _tertiaryShift);
         for (int end = offset + bucketSize; offset < end; offset += 4) {
             int len = hashArea[offset+3];
             if ((q1 == hashArea[offset]) && (q2 == hashArea[offset+1]) && (q3 == hashArea[offset+2]) && (3 == len)) {
                 return _names[offset >> 2];
             }
             if (len == 0) {
                 return null;
             }
         }
         for (offset = _spilloverStart(); offset < _spilloverEnd; offset += 4) {
             if ((q1 == hashArea[offset]) && (q2 == hashArea[offset+1]) && (q3 == hashArea[offset+2])
                     && (3 == hashArea[offset+3])) {
                 return _names[offset >> 2];
             }
         }
         return null;
     }

     private String _findSecondary(int origOffset, int hash, int[] q, int qlen)
     {
         int offset = _tertiaryStart + ((origOffset >> (_tertiaryShift + 2)) << _tertiaryShift);
         final int[] hashArea = _hashArea;

         final int bucketSize = (1 << _tertiaryShift);
         for (int end = offset + bucketSize; offset < end; offset += 4) {
             int len = hashArea[offset+3];
             if ((hash == hashArea[offset]) && (qlen == len)) {
                 if (_verifyLongName(q, qlen, hashArea[offset+1])) {
                     return _names[offset >> 2];
                 }
             }
             if (len == 0) {
                 return null;
             }
         }
         for (offset = _spilloverStart(); offset < _spilloverEnd; offset += 4) {
             if ((hash == hashArea[offset]) && (qlen == hashArea[offset+3])) {
                 if (_verifyLongName(q, qlen, hashArea[offset+1])) {
                     return _names[offset >> 2];
                 }
             }
         }
         return null;
     }

     private boolean _verifyLongName(int[] q, int qlen, int spillOffset)
     {
         final int[] hashArea = _hashArea;
         // spillOffset assumed to be physical index right into quad string
         int ix = 0;

         switch (qlen) {
         default:
             return _verifyLongName2(q, qlen, spillOffset);
         case 8:
             if (q[ix++] != hashArea[spillOffset++]) return false;
         case 7:
             if (q[ix++] != hashArea[spillOffset++]) return false;
         case 6:
             if (q[ix++] != hashArea[spillOffset++]) return false;
         case 5:
             if (q[ix++] != hashArea[spillOffset++]) return false;
         case 4: // always at least 4
             if (q[ix++] != hashArea[spillOffset++]) return false;
             if (q[ix++] != hashArea[spillOffset++]) return false;
             if (q[ix++] != hashArea[spillOffset++]) return false;
             if (q[ix++] != hashArea[spillOffset++]) return false;
         }
         return true;
     }

     private boolean _verifyLongName2(int[] q, int qlen, int spillOffset)
     {
         int ix = 0;
         do {
             if (q[ix++] != _hashArea[spillOffset++]) {
                 return false;
             }
         } while (ix < qlen);
         return true;
     }

     /*
     /**********************************************************
     /* API, mutators
     /**********************************************************
      */

     public String addName(String name, int q1) {
         _verifySharing();
         if (_intern) {
             name = InternCache.instance.intern(name);
         }
         int offset = _findOffsetForAdd(calcHash(q1));
         _hashArea[offset] = q1;
         _hashArea[offset+3] = 1;
         _names[offset >> 2] = name;
         ++_count;
         _verifyNeedForRehash();
         return name;
     }

     public String addName(String name, int q1, int q2) {
         _verifySharing();
         if (_intern) {
             name = InternCache.instance.intern(name);
         }
         int hash = (q2 == 0) ? calcHash(q1) : calcHash(q1, q2);
         int offset = _findOffsetForAdd(hash);
         _hashArea[offset] = q1;
         _hashArea[offset+1] = q2;
         _hashArea[offset+3] = 2;
         _names[offset >> 2] = name;
         ++_count;
         _verifyNeedForRehash();
         return name;
     }

     public String addName(String name, int q1, int q2, int q3) {
         _verifySharing();
         if (_intern) {
             name = InternCache.instance.intern(name);
         }
         int offset = _findOffsetForAdd(calcHash(q1, q2, q3));
         _hashArea[offset] = q1;
         _hashArea[offset+1] = q2;
         _hashArea[offset+2] = q3;
         _hashArea[offset+3] = 3;
         _names[offset >> 2] = name;
         ++_count;
         _verifyNeedForRehash();
         return name;
     }

     public String addName(String name, int[] q, int qlen)
     {
         _verifySharing();
         if (_intern) {
             name = InternCache.instance.intern(name);
         }
         int offset;

         switch (qlen) {
         case 1:
         {
                 offset = _findOffsetForAdd(calcHash(q[0]));
                 _hashArea[offset] = q[0];
                 _hashArea[offset+3] = 1;
             }
             break;
         case 2:
             {
                 offset = _findOffsetForAdd(calcHash(q[0], q[1]));
                 _hashArea[offset] = q[0];
                 _hashArea[offset+1] = q[1];
                 _hashArea[offset+3] = 2;
             }
             break;
         case 3:
             {
                 offset = _findOffsetForAdd(calcHash(q[0], q[1], q[2]));
                 _hashArea[offset] = q[0];
                 _hashArea[offset+1] = q[1];
                 _hashArea[offset+2] = q[2];
                 _hashArea[offset+3] = 3;
             }
             break;
         default:
             final int hash = calcHash(q, qlen);
             offset = _findOffsetForAdd(hash);

             _hashArea[offset] = hash;
             int longStart = _appendLongName(q, qlen);
             _hashArea[offset+1] = longStart;
             _hashArea[offset+3] = qlen;
         }
         // plus add the actual String
         _names[offset >> 2] = name;

         // and finally; see if we really should rehash.
         ++_count;
         _verifyNeedForRehash();
         return name;
     }

     private void _verifyNeedForRehash() {
         // Yes if above 80%, or above 50% AND have ~1% spill-overs
         if (_count > (_hashSize >> 1)) { // over 50%
             int spillCount = (_spilloverEnd - _spilloverStart()) >> 2;
             if ((spillCount > (1 + _count >> 7))
                     || (_count > (_hashSize * 0.80))) {
                 _needRehash = true;
             }
         }
     }

     private void _verifySharing()
     {
         if (_hashShared) {
             _hashArea = Arrays.copyOf(_hashArea, _hashArea.length);
             _names = Arrays.copyOf(_names, _names.length);
             _hashShared = false;
             // 09-Sep-2015, tatu: As per [jackson-core#216], also need to ensure
             //    we rehash as needed, as need-rehash flag is not copied from parent
             _verifyNeedForRehash();
         }
         if (_needRehash) {
             rehash();
         }
     }

     /**
      * Method called to find the location within hash table to add a new symbol in.
      */
     private int _findOffsetForAdd(int hash)
     {
         // first, check the primary:
         int offset = _calcOffset(hash);
         final int[] hashArea = _hashArea;
         if (hashArea[offset+3] == 0) {
 //System.err.printf(" PRImary slot #%d, hash %X\n", (offset>>2), hash & 0x7F);
             return offset;
         }
         // then secondary
         int offset2 = _secondaryStart + ((offset >> 3) << 2);
         if (hashArea[offset2+3] == 0) {
 //System.err.printf(" SECondary slot #%d (start x%X), hash %X\n",(offset >> 3), _secondaryStart, (hash & 0x7F));
             return offset2;
         }
         // if not, tertiary?

         offset2 = _tertiaryStart + ((offset >> (_tertiaryShift + 2)) << _tertiaryShift);
         final int bucketSize = (1 << _tertiaryShift);
         for (int end = offset2 + bucketSize; offset2 < end; offset2 += 4) {
             if (hashArea[offset2+3] == 0) {
 //System.err.printf(" TERtiary slot x%X (from x%X, start x%X), hash %X.\n", offset2, ((offset >> (_tertiaryShift + 2)) << _tertiaryShift), _tertiaryStart, (hash & 0x7F));
                 return offset2;
             }
         }

         // and if even tertiary full, append at the end of spill area
         offset = _spilloverEnd;
         _spilloverEnd += 4;

 //System.err.printf(" SPIll-over at x%X; start x%X; end x%X, hash %X\n", offset, _spilloverStart(), _hashArea.length, (hash & 0x7F));

         // one caveat: in the unlikely event if spill-over filling up,
         // check if that could be considered a DoS attack; handle appropriately
         // (NOTE: approximate for now; we could verify details if that becomes necessary)
         /* 31-Jul-2015, tatu: Note that spillover area does NOT end at end of array,
          *   since "long names" area follows. Instead, need to calculate from hash size.
          */
         final int end = (_hashSize << 3);
         if (_spilloverEnd >= end) {
             if (_failOnDoS) {
                 _reportTooManyCollisions();
             }
             // and if we didn't fail, we'll simply force rehash for next add
             // (which, in turn, may double up or nuke contents, depending on size etc)
             _needRehash = true;
         }
         return offset;
     }

     private int _appendLongName(int[] quads, int qlen)
     {
         int start = _longNameOffset;

         // note: at this point we must already be shared. But may not have enough space
         if ((start + qlen) > _hashArea.length) {
             // try to increment in reasonable chunks; at least space that we need
             int toAdd = (start + qlen) - _hashArea.length;
             // but at least 1/8 of regular hash area size or 16kB (whichever smaller)
             int minAdd = Math.min(4096, _hashSize);

             int newSize = _hashArea.length + Math.max(toAdd, minAdd);
             _hashArea = Arrays.copyOf(_hashArea, newSize);
         }
         System.arraycopy(quads, 0, _hashArea, start, qlen);
         _longNameOffset += qlen;
         return start;
     }

     /*
     /**********************************************************
     /* Hash calculation
     /**********************************************************
      */

     /* Note on hash calculation: we try to make it more difficult to
      * generate collisions automatically; part of this is to avoid
      * simple "multiply-add" algorithm (like JDK String.hashCode()),
      * and add bit of shifting. And other part is to make this
      * non-linear, at least for shorter symbols.
      */

     // JDK uses 31; other fine choices are 33 and 65599, let's use 33
     // as it seems to give fewest collisions for us
     // (see [http://www.cse.yorku.ca/~oz/hash.html] for details)
     private final static int MULT = 33;
     private final static int MULT2 = 65599;
     private final static int MULT3 = 31;

     public int calcHash(int q1)
     {
         int hash = q1 ^ _seed;
         /* 29-Mar-2015, tatu: Earlier used 15 + 9 right shifts, which worked ok
          *    except for one specific problem case: numbers. So needed to make sure
          *    that all 4 least-significant bits participate in hash. Couple of ways
          *    to work it out, but this is the simplest, fast and seems to do ok.
          */
         hash += (hash >>> 16); // to xor hi- and low- 16-bits
         hash ^= (hash << 3); // shuffle back a bit
         hash += (hash >>> 12); // and bit more
         return hash;
     }

     public int calcHash(int q1, int q2)
     {
         // For two quads, let's change algorithm a bit, to spice
         // things up (can do bit more processing anyway)
         int hash = q1;

         hash += (hash >>> 15); // try mixing first and second byte pairs first
         hash ^= (hash >>> 9); // as well as lowest 2 bytes
         hash += (q2 * MULT); // then add second quad
         hash ^= _seed;
         hash += (hash >>> 16); // and shuffle some more
         hash ^= (hash >>> 4);
         hash += (hash << 3);

         return hash;
     }

     public int calcHash(int q1, int q2, int q3)
     { // use same algorithm as multi-byte, tested to work well
         int hash = q1 ^ _seed;
         hash += (hash >>> 9);
         hash *= MULT3;
         hash += q2;
         hash *= MULT;
         hash += (hash >>> 15);
         hash ^= q3;
         // 26-Mar-2015, tatu: As per two-quad case, a short shift seems to help more here
         hash += (hash >>> 4);

         hash += (hash >>> 15);
         hash ^= (hash << 9);

         return hash;
     }

     public int calcHash(int[] q, int qlen)
     {
         if (qlen < 4) {
             throw new IllegalArgumentException();
         }
         /* And then change handling again for "multi-quad" case; mostly
          * to make calculation of collisions less fun. For example,
          * add seed bit later in the game, and switch plus/xor around,
          * use different shift lengths.
          */
         int hash = q[0] ^ _seed;
         hash += (hash >>> 9);
         hash += q[1];
         hash += (hash >>> 15);
         hash *= MULT;
         hash ^= q[2];
         hash += (hash >>> 4);

         for (int i = 3; i < qlen; ++i) {
             int next = q[i];
             next = next ^ (next >> 21);
             hash += next;
         }
         hash *= MULT2;

         // and finally shuffle some more once done
         hash += (hash >>> 19);
         hash ^= (hash << 5);
         return hash;
     }

     /*
     /**********************************************************
     /* Rehashing
     /**********************************************************
      */

     private void rehash()
     {
         _needRehash = false;
         // Note: since we'll make copies, no need to unshare, can just mark as such:
         _hashShared = false;

         // And then we can first deal with the main hash area. Since we are expanding
         // linearly (double up), we know there'll be no collisions during this phase.
         final int[] oldHashArea = _hashArea;
         final String[] oldNames = _names;
         final int oldSize = _hashSize;
         final int oldCount = _count;
         final int newSize = oldSize + oldSize;
         final int oldEnd = _spilloverEnd;

         /* 13-Mar-2010, tatu: Let's guard against OOME that could be caused by
          *    large documents with unique (or mostly so) names
          */
         if (newSize > MAX_T_SIZE) {
             nukeSymbols(true);
             return;
         }
         // double up main hash area, but do not expand long-name area:
         _hashArea = new int[oldHashArea.length + (oldSize<<3)];
         _hashSize = newSize;
         _secondaryStart = (newSize << 2); // 4 ints per entry
         _tertiaryStart = _secondaryStart + (_secondaryStart >> 1); // right after secondary
         _tertiaryShift = _calcTertiaryShift(newSize);

         // and simply double up name array
         _names = new String[oldNames.length << 1];
         nukeSymbols(false);

         // Plus we can scan only through the primary hash area, looking for non-empty
         // slots, without worrying about ordering. This should never reduce priority
         // of existing entries: primaries remain primaries; however, due to increased
         // space, secondaries may become primaries etc

         int copyCount = 0;
         int[] q = new int[16];
         for (int offset = 0, end = oldEnd; offset < end; offset += 4) {
             int len = oldHashArea[offset+3];
             if (len == 0) { // empty slot, skip
                 continue;
             }
             ++copyCount;
             String name = oldNames[offset>>2];
             switch (len) {
             case 1:
                 q[0] = oldHashArea[offset];
                 addName(name, q, 1);
                 break;
             case 2:
                 q[0] = oldHashArea[offset];
                 q[1] = oldHashArea[offset+1];
                 addName(name, q, 2);
                 break;
             case 3:
                 q[0] = oldHashArea[offset];
                 q[1] = oldHashArea[offset+1];
                 q[2] = oldHashArea[offset+2];
                 addName(name, q, 3);
                 break;
             default:
                 if (len > q.length) {
                     q = new int[len];
                 }
                 // #0 is hash, #1 offset
                 int qoff = oldHashArea[offset+1];
                 System.arraycopy(oldHashArea, qoff, q, 0, len);
                 addName(name, q, len);
                 break;
             }
         }

         // Sanity checks: since corruption difficult to detect, assert explicitly
         // with production code
         if (copyCount != oldCount) {
             throw new IllegalStateException("Failed rehash(): old count="+oldCount+", copyCount="+copyCount);
         }
     }

     /**
      * Helper method called to empty all shared symbols, but to leave
      * arrays allocated
      */
     private void nukeSymbols(boolean fill) {
         _count = 0;
         // reset spill-over to empty (starting at 7/8 of hash area)
         _spilloverEnd = _spilloverStart();
         // and long name area to empty, starting immediately after hash area
         _longNameOffset = _hashSize << 3;
         if (fill) {
             Arrays.fill(_hashArea, 0);
             Arrays.fill(_names, null);
         }
     }

     /*
     /**********************************************************
     /* Helper methods
     /**********************************************************
      */

     /**
      * Helper method that calculates start of the spillover area
      */
     private final int _spilloverStart() {
         // we'll need slot at 1.75x of hashSize, but with 4-ints per slot.
         // So basically multiply by 7
         int offset = _hashSize;
         return (offset << 3) - offset;
     }

     protected void _reportTooManyCollisions()
     {
         // First: do not fuzz about small symbol tables; may get balanced by doubling up
         if (_hashSize <= 1024) { // would have spill-over area of 128 entries
             return;
         }
         throw new IllegalStateException("Spill-over slots in symbol table with "+_count
                 +" entries, hash area of "+_hashSize+" slots is now full (all "
                 +(_hashSize >> 3)+" slots -- suspect a DoS attack based on hash collisions."
                 +" You can disable the check via `JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW`");
     }

     static int _calcTertiaryShift(int primarySlots)
     {
         // first: we only get 1/4 of slots of primary, to divide
         int tertSlots = (primarySlots) >> 2;

         // default is for buckets of 4 slots (each 4 ints, i.e. 1 << 4)
         if (tertSlots < 64) {
             return 4;
         }
         if (tertSlots <= 256) { // buckets of 8 slots (up to 256 == 32 x 8)
             return 5;
         }
         if (tertSlots <= 1024) { // buckets of 16 slots (up to 1024 == 64 x 16)
             return 6;
         }
         // and biggest buckets have 32 slots
         return 7;
     }

     /*
     /**********************************************************
     /* Helper classes
     /**********************************************************
      */

     /**
      * Immutable value class used for sharing information as efficiently
      * as possible, by only require synchronization of reference manipulation
      * but not access to contents.
      *
      * @since 2.1
      */
     private final static class TableInfo
     {
         public final int size;
         public final int count;
         public final int tertiaryShift;
         public final int[] mainHash;
         public final String[] names;
         public final int spilloverEnd;
         public final int longNameOffset;

         public TableInfo(int size, int count, int tertiaryShift,
                 int[] mainHash, String[] names, int spilloverEnd, int longNameOffset)
         {
             this.size = size;
             this.count = count;
             this.tertiaryShift = tertiaryShift;
             this.mainHash = mainHash;
             this.names = names;
             this.spilloverEnd = spilloverEnd;
             this.longNameOffset = longNameOffset;
         }

         public TableInfo(ByteQuadsCanonicalizer src)
         {
             size = src._hashSize;
             count = src._count;
             tertiaryShift = src._tertiaryShift;
             mainHash = src._hashArea;
             names = src._names;
             spilloverEnd = src._spilloverEnd;
             longNameOffset = src._longNameOffset;
         }

         public static TableInfo createInitial(int sz) {
             int hashAreaSize = sz << 3;
             int tertShift = _calcTertiaryShift(sz);

             return new TableInfo(sz, // hashSize
                     0, // count
                     tertShift,
                     new int[hashAreaSize], // mainHash, 2x slots, 4 ints per slot
                     new String[sz << 1], // names == 2x slots
                     hashAreaSize - sz, // at 7/8 of the total area
                     hashAreaSize // longNameOffset, immediately after main hashes
             );
         }
     }
 }