Add size class computation capability.

Add size class computation capability, currently used only as validation
of the size class lookup tables.  Generalize the size class spacing used
for bins, for eventual use throughout the full range of allocation
sizes.
diff --git a/configure.ac b/configure.ac
index 58f6289..5852249 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1201,6 +1201,29 @@
 fi
 
 dnl ============================================================================
+dnl Check for __builtin_clz() and __builtin_clzl().
+
+AC_CACHE_CHECK([for __builtin_clz],
+               [je_cv_builtin_clz],
+               [AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+                                                [
+                                                {
+                                                        unsigned x = 0;
+                                                        int y = __builtin_clz(x);
+                                                }
+                                                {
+                                                        unsigned long x = 0;
+                                                        int y = __builtin_clzl(x);
+                                                }
+                                                ])],
+                               [je_cv_builtin_clz=yes],
+                               [je_cv_builtin_clz=no])])
+
+if test "x${je_cv_builtin_clz}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
+fi
+
+dnl ============================================================================
 dnl Check for spinlock(3) operations as provided on Darwin.
 
 JE_COMPILABLE([Darwin OSSpin*()], [
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 598a89b..2dc9501 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -463,8 +463,15 @@
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+size_t	small_size2bin_compute(size_t size);
+size_t	small_size2bin_lookup(size_t size);
 size_t	small_size2bin(size_t size);
+size_t	small_bin2size_compute(size_t binind);
+size_t	small_bin2size_lookup(size_t binind);
 size_t	small_bin2size(size_t binind);
+size_t	small_s2u_compute(size_t size);
+size_t	small_s2u_lookup(size_t size);
+size_t	small_s2u(size_t size);
 arena_chunk_map_t	*arena_mapp_get(arena_chunk_t *chunk, size_t pageind);
 size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbitsp_read(size_t *mapbitsp);
@@ -507,18 +514,144 @@
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
+JEMALLOC_INLINE size_t
+small_size2bin_compute(size_t size)
+{
+#if (NTBINS != 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		size_t lg_ceil = lg_floor(pow2_ceil(size));
+		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
+	} else
+#endif
+	{
+		size_t x = lg_floor((size<<1)-1);
+		size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
+		size_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t bin = NTBINS + grp + mod;
+		return (bin);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+small_size2bin_lookup(size_t size)
+{
+
+	assert(size <= LOOKUP_MAXCLASS);
+	{
+		size_t ret = ((size_t)(small_size2bin_tab[(size-1) >>
+		    LG_TINY_MIN]));
+		assert(ret == small_size2bin_compute(size));
+		return (ret);
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 small_size2bin(size_t size)
 {
 
-	return ((size_t)(small_size2bin_tab[(size-1) >> LG_TINY_MIN]));
+	assert(size > 0);
+	if (size <= LOOKUP_MAXCLASS)
+		return (small_size2bin_lookup(size));
+	else
+		return (small_size2bin_compute(size));
+}
+
+JEMALLOC_INLINE size_t
+small_bin2size_compute(size_t binind)
+{
+#if (NTBINS > 0)
+	if (binind < NTBINS)
+		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + binind));
+	else
+#endif
+	{
+		size_t reduced_binind = binind - NTBINS;
+		size_t grp = reduced_binind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = reduced_binind & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+		    1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_QUANTUM-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t usize = grp_size + mod_size;
+		return (usize);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+small_bin2size_lookup(size_t binind)
+{
+
+	assert(binind < NBINS);
+	{
+		size_t ret = ((size_t)(small_bin2size_tab[binind]));
+		assert(ret == small_bin2size_compute(binind));
+		return (ret);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 small_bin2size(size_t binind)
 {
 
-	return ((size_t)(small_bin2size_tab[binind]));
+	return (small_bin2size_lookup(binind));
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+small_s2u_compute(size_t size)
+{
+#if (NTBINS > 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		size_t lg_ceil = lg_floor(pow2_ceil(size));
+		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
+		    (ZU(1) << lg_ceil));
+	} else
+#endif
+	{
+		size_t x = lg_floor((size<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (size + delta_mask) & ~delta_mask;
+		return (usize);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+small_s2u_lookup(size_t size)
+{
+	size_t ret = (small_bin2size(small_size2bin(size)));
+
+	assert(ret == small_s2u_compute(size));
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+small_s2u(size_t size)
+{
+
+	assert(size > 0);
+	if (size <= LOOKUP_MAXCLASS)
+		return (small_s2u_lookup(size));
+	else
+		return (small_s2u_compute(size));
 }
 #  endif /* JEMALLOC_ARENA_INLINE_A */
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index cf20f1f..491345c 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -475,7 +475,7 @@
 {
 
 	if (size <= SMALL_MAXCLASS)
-		return (small_bin2size(small_size2bin(size)));
+		return (small_s2u(size));
 	if (size <= arena_maxclass)
 		return (PAGE_CEILING(size));
 	return (CHUNK_CEILING(size));
@@ -518,7 +518,7 @@
 
 	if (usize <= arena_maxclass && alignment <= PAGE) {
 		if (usize <= SMALL_MAXCLASS)
-			return (small_bin2size(small_size2bin(usize)));
+			return (small_s2u(usize));
 		return (PAGE_CEILING(usize));
 	} else {
 		size_t run_size;
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 09ddd4f..a9a50f1 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -48,6 +48,11 @@
 #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8
 
 /*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#undef JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
  * Defined if OSSpin*() functions are available, as provided by Darwin, and
  * documented in the spinlock(3) manual page.
  */
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 4e23923..38e2886 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -39,9 +39,15 @@
 #endif
 
 #define	ZU(z)	((size_t)z)
+#define	ZI(z)	((ssize_t)z)
 #define	QU(q)	((uint64_t)q)
 #define	QI(q)	((int64_t)q)
 
+#define	KZU(z)	ZU(z##ULL)
+#define	KZI(z)	ZI(z##ULL)
+#define	KQU(q)	QU(q##ULL)
+#define	KQI(q)	QI(q##ULL)
+
 #ifndef __DECONST
 #  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
 #endif
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index f6c4fbc..3401301 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -234,6 +234,7 @@
 jemalloc_postfork_child
 jemalloc_postfork_parent
 jemalloc_prefork
+lg_floor
 malloc_cprintf
 malloc_mutex_init
 malloc_mutex_lock
@@ -348,8 +349,15 @@
 sa2u
 set_errno
 small_bin2size
+small_bin2size_compute
+small_bin2size_lookup
 small_bin2size_tab
+small_s2u
+small_s2u_compute
+small_s2u_lookup
 small_size2bin
+small_size2bin_compute
+small_size2bin_lookup
 small_size2bin_tab
 stats_cactive
 stats_cactive_add
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 960674a..3edebf2 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -2,16 +2,23 @@
 
 # The following limits are chosen such that they cover all supported platforms.
 
-# Range of quanta.
-lg_qmin=3
-lg_qmax=4
+# Pointer sizes.
+lg_zarr="2 3"
+
+# Quanta.
+lg_qarr="3 4"
 
 # The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)].
 lg_tmin=3
 
-# Range of page sizes.
-lg_pmin=12
-lg_pmax=16
+# Maximum lookup size.
+lg_kmax=12
+
+# Page sizes.
+lg_parr="12 13 16"
+
+# Size class group size (number of size classes for each size doubling).
+lg_g=2
 
 pow2() {
   e=$1
@@ -22,68 +29,206 @@
   done
 }
 
+lg() {
+  x=$1
+  lg_result=0
+  while [ ${x} -gt 1 ] ; do
+    lg_result=$((${lg_result} + 1))
+    x=$((${x} / 2))
+  done
+}
+
+size_class() {
+  index=$1
+  lg_grp=$2
+  lg_delta=$3
+  ndelta=$4
+  lg_p=$5
+  lg_kmax=$6
+
+  lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
+  if [ ${pow2_result} -lt ${ndelta} ] ; then
+    rem="yes"
+  else
+    rem="no"
+  fi
+
+  lg_size=${lg_grp}
+  if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then
+    lg_size=$((${lg_grp} + 1))
+  else
+    lg_size=${lg_grp}
+    rem="yes"
+  fi
+
+  if [ ${lg_size} -lt ${lg_p} ] ; then
+    bin="yes"
+  else
+    bin="no"
+  fi
+  if [ ${lg_size} -lt ${lg_kmax} \
+      -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then
+    lg_delta_lookup=${lg_delta}
+  else
+    lg_delta_lookup="no"
+  fi
+  printf '    SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup}
+  # Defined upon return:
+  # - lg_delta_lookup (${lg_delta} or "no")
+  # - bin ("yes" or "no")
+}
+
+sep_line() {
+  echo "                                               \\"
+}
+
+size_classes() {
+  lg_z=$1
+  lg_q=$2
+  lg_t=$3
+  lg_p=$4
+  lg_g=$5
+
+  pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result}
+  pow2 ${lg_g}; g=${pow2_result}
+
+  echo "#define	SIZE_CLASSES \\"
+  echo "  /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\"
+
+  ntbins=0
+  nlbins=0
+  lg_tiny_maxclass='"NA"'
+  nbins=0
+
+  # Tiny size classes.
+  ndelta=0
+  index=0
+  lg_grp=${lg_t}
+  lg_delta=${lg_grp}
+  while [ ${lg_grp} -lt ${lg_q} ] ; do
+    size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+    if [ ${lg_delta_lookup} != "no" ] ; then
+      nlbins=$((${index} + 1))
+    fi
+    if [ ${bin} != "no" ] ; then
+      nbins=$((${index} + 1))
+    fi
+    ntbins=$((${ntbins} + 1))
+    lg_tiny_maxclass=${lg_grp} # Final written value is correct.
+    index=$((${index} + 1))
+    lg_delta=${lg_grp}
+    lg_grp=$((${lg_grp} + 1))
+  done
+
+  # First non-tiny group.
+  if [ ${ntbins} -gt 0 ] ; then
+    sep_line
+    # The first size class has an unusual encoding, because the size has to be
+    # split between grp and delta*ndelta.
+    lg_grp=$((${lg_grp} - 1))
+    ndelta=1
+    size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+    index=$((${index} + 1))
+    lg_grp=$((${lg_grp} + 1))
+    lg_delta=$((${lg_delta} + 1))
+  fi
+  while [ ${ndelta} -lt ${g} ] ; do
+    size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+    index=$((${index} + 1))
+    ndelta=$((${ndelta} + 1))
+  done
+
+  # All remaining groups.
+  lg_grp=$((${lg_grp} + ${lg_g}))
+  while [ ${lg_grp} -lt ${ptr_bits} ] ; do
+    sep_line
+    ndelta=1
+    if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then
+      ndelta_limit=$((${g} - 1))
+    else
+      ndelta_limit=${g}
+    fi
+    while [ ${ndelta} -le ${ndelta_limit} ] ; do
+      size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+      if [ ${lg_delta_lookup} != "no" ] ; then
+        nlbins=$((${index} + 1))
+        # Final written value is correct:
+        lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+      fi
+      if [ ${bin} != "no" ] ; then
+        nbins=$((${index} + 1))
+        # Final written value is correct:
+        small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+      fi
+      index=$((${index} + 1))
+      ndelta=$((${ndelta} + 1))
+    done
+    lg_grp=$((${lg_grp} + 1))
+    lg_delta=$((${lg_delta} + 1))
+  done
+  echo
+
+  # Defined upon completion:
+  # - ntbins
+  # - nlbins
+  # - nbins
+  # - lg_tiny_maxclass
+  # - lookup_maxclass
+  # - small_maxclass
+}
+
 cat <<EOF
 /* This file was automatically generated by size_classes.sh. */
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
+/*
+ * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to
+ * be defined prior to inclusion, and it in turn defines:
+ *
+ *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
+ *   SIZE_CLASSES: Complete table of
+ *                 SC(index, lg_delta, size, bin, lg_delta_lookup) tuples.
+ *     index: Size class index.
+ *     lg_grp: Lg group base size (no deltas added).
+ *     lg_delta: Lg delta to previous size class.
+ *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
+ *     bin: 'yes' if a small bin size class, 'no' otherwise.
+ *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
+ *                      otherwise.
+ *   NTBINS: Number of tiny bins.
+ *   NLBINS: Number of bins supported by the lookup table.
+ *   NBINS: Number of small size class bins.
+ *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
+ *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
+ *   SMALL_MAXCLASS: Maximum small size class.
+ */
+
+#define	LG_SIZE_CLASS_GROUP	${lg_g}
+
 EOF
 
-lg_q=${lg_qmin}
-while [ ${lg_q} -le ${lg_qmax} ] ; do
-  lg_t=${lg_tmin}
-  while [ ${lg_t} -le ${lg_q} ] ; do
-    lg_p=${lg_pmin}
-    while [ ${lg_p} -le ${lg_pmax} ] ; do
-      echo "#if (LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
-      echo "#define	SIZE_CLASSES_DEFINED"
-      pow2 ${lg_q}; q=${pow2_result}
-      pow2 ${lg_t}; t=${pow2_result}
-      pow2 ${lg_p}; p=${pow2_result}
-      bin=0
-      psz=0
-      sz=${t}
-      delta=$((${sz} - ${psz}))
-      echo "/*  SIZE_CLASS(bin,	delta,	sz) */"
-      echo "#define	SIZE_CLASSES							\\"
-
-      # Tiny size classes.
-      while [ ${sz} -lt ${q} ] ; do
-        echo "    SIZE_CLASS(${bin},	${delta},	${sz})					\\"
-        bin=$((${bin} + 1))
-        psz=${sz}
-        sz=$((${sz} + ${sz}))
-        delta=$((${sz} - ${psz}))
+for lg_z in ${lg_zarr} ; do
+  for lg_q in ${lg_qarr} ; do
+    lg_t=${lg_tmin}
+    while [ ${lg_t} -le ${lg_q} ] ; do
+      # Iterate through page sizes and compute how many bins there are.
+      for lg_p in ${lg_parr} ; do
+        echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
+        size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g}
+        echo "#define	SIZE_CLASSES_DEFINED"
+        echo "#define	NTBINS			${ntbins}"
+        echo "#define	NLBINS			${nlbins}"
+        echo "#define	NBINS			${nbins}"
+        echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
+        echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
+        echo "#define	SMALL_MAXCLASS		${small_maxclass}"
+        echo "#endif"
+        echo
       done
-      # Quantum-multiple size classes.  For each doubling of sz, as many as 4
-      # size classes exist.  Their spacing is the greater of:
-      # - q
-      # - sz/4, where sz is a power of 2
-      while [ ${sz} -lt ${p} ] ; do
-        if [ ${sz} -ge $((${q} * 4)) ] ; then
-          i=$((${sz} / 4))
-        else
-          i=${q}
-        fi
-        next_2pow=$((${sz} * 2))
-        while [ ${sz} -lt $next_2pow ] ; do
-          echo "    SIZE_CLASS(${bin},	${delta},	${sz})					\\"
-          bin=$((${bin} + 1))
-          psz=${sz}
-          sz=$((${sz} + ${i}))
-          delta=$((${sz} - ${psz}))
-        done
-      done
-      echo
-      echo "#define	NBINS		${bin}"
-      echo "#define	SMALL_MAXCLASS	${psz}"
-      echo "#endif"
-      echo
-      lg_p=$((${lg_p} + 1))
+      lg_t=$((${lg_t} + 1))
     done
-    lg_t=$((${lg_t} + 1))
   done
-  lg_q=$((${lg_q} + 1))
 done
 
 cat <<EOF
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 6b938f7..7864823 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -110,6 +110,7 @@
 
 #ifndef JEMALLOC_ENABLE_INLINE
 size_t	pow2_ceil(size_t x);
+size_t	lg_floor(size_t x);
 void	set_errno(int errnum);
 int	get_errno(void);
 #endif
@@ -133,6 +134,52 @@
 	return (x);
 }
 
+#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+	size_t ret;
+
+	asm ("bsr %1, %0"
+	    : "=r"(ret) // Outputs.
+	    : "r"(x)    // Inputs.
+	    );
+	return (ret);
+}
+#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+
+#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
+	return ((8 << LG_SIZEOF_PTR - 1) - __builtin_clz(x));
+#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+	return ((8 << LG_SIZEOF_PTR - 1) - __builtin_clzl(x));
+#else
+#  error "Unsupported type sizes for lg_floor()"
+#endif
+}
+#else
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+
+        x |= (x >> 1);
+        x |= (x >> 2);
+        x |= (x >> 4);
+        x |= (x >> 8);
+        x |= (x >> 16);
+#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+        x |= (x >> 32);
+        return (65 - ffsl(~x));
+#elif (LG_SIZEOF_PTR == 2)
+        return (33 - ffs(~x));
+#else
+#  error "Unsupported type sizes for lg_floor()"
+#endif
+}
+#endif
+
 /* Sets error code */
 JEMALLOC_INLINE void
 set_errno(int errnum)
diff --git a/src/arena.c b/src/arena.c
index f5d7d06..c392419 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -9,40 +9,39 @@
 
 JEMALLOC_ALIGNED(CACHELINE)
 const uint32_t	small_bin2size_tab[NBINS] = {
-#define SIZE_CLASS(bin, delta, size)		\
+#define	B2S_bin_yes(size) \
 	size,
+#define	B2S_bin_no(size)
+#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+	B2S_bin_##bin((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
-#undef SIZE_CLASS
+#undef B2S_bin_yes
+#undef B2S_bin_no
+#undef SC
 };
 
 JEMALLOC_ALIGNED(CACHELINE)
 const uint8_t	small_size2bin_tab[] = {
-#define	S2B_8(i)	i,
-#define	S2B_16(i)	S2B_8(i) S2B_8(i)
-#define	S2B_32(i)	S2B_16(i) S2B_16(i)
-#define	S2B_64(i)	S2B_32(i) S2B_32(i)
-#define	S2B_128(i)	S2B_64(i) S2B_64(i)
-#define	S2B_256(i)	S2B_128(i) S2B_128(i)
-#define	S2B_512(i)	S2B_256(i) S2B_256(i)
-#define	S2B_1024(i)	S2B_512(i) S2B_512(i)
-#define	S2B_2048(i)	S2B_1024(i) S2B_1024(i)
-#define	S2B_4096(i)	S2B_2048(i) S2B_2048(i)
-#define	S2B_8192(i)	S2B_4096(i) S2B_4096(i)
-#define	SIZE_CLASS(bin, delta, size)					\
-	S2B_##delta(bin)
+#define	S2B_3(i)	i,
+#define	S2B_4(i)	S2B_3(i) S2B_3(i)
+#define	S2B_5(i)	S2B_4(i) S2B_4(i)
+#define	S2B_6(i)	S2B_5(i) S2B_5(i)
+#define	S2B_7(i)	S2B_6(i) S2B_6(i)
+#define	S2B_8(i)	S2B_7(i) S2B_7(i)
+#define	S2B_9(i)	S2B_8(i) S2B_8(i)
+#define	S2B_no(i)
+#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+	S2B_##lg_delta_lookup(index)
 	SIZE_CLASSES
+#undef S2B_3
+#undef S2B_4
+#undef S2B_5
+#undef S2B_6
+#undef S2B_7
 #undef S2B_8
-#undef S2B_16
-#undef S2B_32
-#undef S2B_64
-#undef S2B_128
-#undef S2B_256
-#undef S2B_512
-#undef S2B_1024
-#undef S2B_2048
-#undef S2B_4096
-#undef S2B_8192
-#undef SIZE_CLASS
+#undef S2B_9
+#undef S2B_no
+#undef SC
 };
 
 /******************************************************************************/
@@ -2586,13 +2585,18 @@
 	arena_bin_info_t *bin_info;
 	size_t prev_run_size = PAGE;
 
-#define	SIZE_CLASS(bin, delta, size)					\
-	bin_info = &arena_bin_info[bin];				\
+#define	BIN_INFO_INIT_bin_yes(index, size) \
+	bin_info = &arena_bin_info[index];				\
 	bin_info->reg_size = size;					\
 	prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\
 	bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+#define	BIN_INFO_INIT_bin_no(index, size)
+#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
+	BIN_INFO_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
-#undef SIZE_CLASS
+#undef BIN_INFO_INIT_bin_yes
+#undef BIN_INFO_INIT_bin_no
+#undef SC
 }
 
 void