Implement pz2ind(), pind2sz(), and psz2u().

These compute size classes and indices similarly to size2index(),
index2size() and s2u(), respectively, but using the subset of size
classes that are multiples of the page size.  Note that pszind_t and
szind_t are not interchangeable.
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index a8c476d..224cedd 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -178,6 +178,9 @@
 
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
+/* Page size index type. */
+typedef unsigned pszind_t;
+
 /* Size class index type. */
 typedef unsigned szind_t;
 
@@ -525,6 +528,9 @@
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
+pszind_t	psz2ind(size_t psz);
+size_t	pind2sz(pszind_t pind);
+size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
 szind_t	size2index_lookup(size_t size);
 szind_t	size2index(size_t size);
@@ -545,10 +551,74 @@
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE pszind_t
+psz2ind(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (NPSIZES);
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return (ind);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return (sz);
+	}
+}
+
+JEMALLOC_INLINE size_t
+psz2u(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (0);
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return (usize);
+	}
+}
+
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (NSIZES);
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -557,9 +627,7 @@
 	}
 #endif
 	{
-		szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		szind_t x = lg_floor((size<<1)-1);
 		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
 		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
 		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
@@ -645,6 +713,8 @@
 s2u_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (0);
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -654,9 +724,7 @@
 	}
 #endif
 	{
-		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		size_t x = lg_floor((size<<1)-1);
 		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 		size_t delta = ZU(1) << lg_delta;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8993342..cbafc2b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -393,6 +393,7 @@
 pages_purge
 pages_trim
 pages_unmap
+pind2sz
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -446,6 +447,8 @@
 prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
+psz2ind
+psz2u
 purge_mode_names
 register_zone
 rtree_child_read
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index c9b8471..ecee1a0 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -78,6 +78,21 @@
   lg_p=$5
   lg_kmax=$6
 
+  if [ ${lg_delta} -ge ${lg_p} ] ; then
+    psz="yes"
+  else
+    pow2 ${lg_p}; p=${pow2_result}
+    pow2 ${lg_grp}; grp=${pow2_result}
+    pow2 ${lg_delta}; delta=${pow2_result}
+    sz=$((${grp} + ${delta} * ${ndelta}))
+    npgs=$((${sz} / ${p}))
+    if [ ${sz} -eq $((${npgs} * ${p})) ] ; then
+      psz="yes"
+    else
+      psz="no"
+    fi
+  fi
+
   lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
   if [ ${pow2_result} -lt ${ndelta} ] ; then
     rem="yes"
@@ -106,15 +121,16 @@
   else
     lg_delta_lookup="no"
   fi
-  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${pgs} ${lg_delta_lookup}
+  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${pgs} ${lg_delta_lookup}
   # Defined upon return:
+  # - psz ("yes" or "no")
   # - bin ("yes" or "no")
   # - pgs
   # - lg_delta_lookup (${lg_delta} or "no")
 }
 
 sep_line() {
-  echo "                                                    \\"
+  echo "                                                         \\"
 }
 
 size_classes() {
@@ -128,12 +144,13 @@
   pow2 ${lg_g}; g=${pow2_result}
 
   echo "#define	SIZE_CLASSES \\"
-  echo "  /* index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup */ \\"
+  echo "  /* index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup */ \\"
 
   ntbins=0
   nlbins=0
   lg_tiny_maxclass='"NA"'
   nbins=0
+  npsizes=0
   slab_maxpgs=0
 
   # Tiny size classes.
@@ -146,6 +163,9 @@
     if [ ${lg_delta_lookup} != "no" ] ; then
       nlbins=$((${index} + 1))
     fi
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${bin} != "no" ] ; then
       nbins=$((${index} + 1))
       if [ ${pgs} -gt ${slab_maxpgs} ] ; then
@@ -170,6 +190,9 @@
     index=$((${index} + 1))
     lg_grp=$((${lg_grp} + 1))
     lg_delta=$((${lg_delta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${pgs} -gt ${slab_maxpgs} ] ; then
       slab_maxpgs=${pgs}
     fi
@@ -178,6 +201,9 @@
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     ndelta=$((${ndelta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${pgs} -gt ${slab_maxpgs} ] ; then
       slab_maxpgs=${pgs}
     fi
@@ -200,6 +226,9 @@
         # Final written value is correct:
         lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
       fi
+      if [ ${psz} = "yes" ] ; then
+        npsizes=$((${npsizes} + 1))
+      fi
       if [ ${bin} != "no" ] ; then
         nbins=$((${index} + 1))
         # Final written value is correct:
@@ -229,6 +258,7 @@
   # - nlbins
   # - nbins
   # - nsizes
+  # - npsizes
   # - lg_tiny_maxclass
   # - lookup_maxclass
   # - small_maxclass
@@ -247,12 +277,13 @@
  * be defined prior to inclusion, and it in turn defines:
  *
  *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
- *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, bin,
- *                 pgs, lg_delta_lookup) tuples.
+ *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
+ *                 bin, pgs, lg_delta_lookup) tuples.
  *     index: Size class index.
  *     lg_grp: Lg group base size (no deltas added).
  *     lg_delta: Lg delta to previous size class.
  *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
+ *     psz: 'yes' if a multiple of the page size, 'no' otherwise.
  *     bin: 'yes' if a small bin size class, 'no' otherwise.
  *     pgs: Run page count if a small bin size class, 0 otherwise.
  *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
@@ -261,6 +292,7 @@
  *   NLBINS: Number of bins supported by the lookup table.
  *   NBINS: Number of small size class bins.
  *   NSIZES: Number of size classes.
+ *   NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
@@ -286,6 +318,7 @@
         echo "#define	NLBINS			${nlbins}"
         echo "#define	NBINS			${nbins}"
         echo "#define	NSIZES			${nsizes}"
+        echo "#define	NPSIZES			${npsizes}"
         echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
diff --git a/src/arena.c b/src/arena.c
index 7b9f313..ff119ba 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -19,7 +19,8 @@
 #define	BIN_INFO_bin_yes(reg_size, run_size, nregs)			\
 	{reg_size, run_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
 #define	BIN_INFO_bin_no(reg_size, run_size, nregs)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup)	\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
+    lg_delta_lookup)							\
 	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
 	    (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) +	\
 	    (ndelta<<lg_delta)))
@@ -3511,7 +3512,7 @@
 		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
 	}
 #define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, run_size,		\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, run_size,		\
     lg_delta_lookup)							\
 	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4eec09b..b907d9e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -81,7 +81,7 @@
 /* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
 const size_t	index2size_tab[NSIZES+1] = {
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
@@ -154,7 +154,7 @@
 #define	S2B_11(i)	S2B_10(i) S2B_10(i)
 #endif
 #define	S2B_no(i)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
 	S2B_##lg_delta_lookup(index)
 	SIZE_CLASSES
 #undef S2B_3
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 2e2caaf..4e1e0ce 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -80,25 +80,96 @@
 }
 TEST_END
 
+TEST_BEGIN(test_psize_classes)
+{
+	size_t size_class, max_size_class;
+	pszind_t pind, max_pind;
+
+	max_size_class = get_max_size_class();
+	max_pind = psz2ind(max_size_class);
+
+	for (pind = 0, size_class = pind2sz(pind); pind < max_pind ||
+	    size_class < max_size_class; pind++, size_class =
+	    pind2sz(pind)) {
+		assert_true(pind < max_pind,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		assert_true(size_class < max_size_class,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+
+		assert_u_eq(pind, psz2ind(size_class),
+		    "psz2ind() does not reverse pind2sz(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+		assert_zu_eq(size_class, pind2sz(psz2ind(size_class)),
+		    "pind2sz() does not reverse psz2ind(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+
+		assert_u_eq(pind+1, psz2ind(size_class+1),
+		    "Next size_class does not round up properly");
+
+		assert_zu_eq(size_class, (pind > 0) ?
+		    psz2u(pind2sz(pind-1)+1) : psz2u(1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class-1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class),
+		    "psz2u() does not compute same size class");
+		assert_zu_eq(psz2u(size_class+1), pind2sz(pind+1),
+		    "psz2u() does not round up to next size class");
+	}
+
+	assert_u_eq(pind, psz2ind(pind2sz(pind)),
+	    "psz2ind() does not reverse pind2sz()");
+	assert_zu_eq(max_size_class, pind2sz(psz2ind(max_size_class)),
+	    "pind2sz() does not reverse psz2ind()");
+
+	assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class-1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class),
+	    "psz2u() does not compute same size class");
+}
+TEST_END
+
 TEST_BEGIN(test_overflow)
 {
 	size_t max_size_class;
 
 	max_size_class = get_max_size_class();
 
-	assert_u_ge(size2index(max_size_class+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(SIZE_T_MAX), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
+	assert_u_eq(size2index(max_size_class+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(SIZE_T_MAX), NSIZES,
+	    "size2index() should return NSIZES on overflow");
 
-	assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
-	assert_zu_gt(s2u(ZU(PTRDIFF_MAX)+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
+	assert_zu_eq(s2u(max_size_class+1), 0,
+	    "s2u() should return 0 for unsupported size");
+	assert_zu_eq(s2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "s2u() should return 0 for unsupported size");
 	assert_zu_eq(s2u(SIZE_T_MAX), 0,
 	    "s2u() should return 0 on overflow");
+
+	assert_u_eq(psz2ind(max_size_class+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+
+	assert_zu_eq(psz2u(max_size_class+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(SIZE_T_MAX), 0,
+	    "psz2u() should return 0 on overflow");
 }
 TEST_END
 
@@ -108,5 +179,6 @@
 
 	return (test(
 	    test_size_classes,
+	    test_psize_classes,
 	    test_overflow));
 }