[scudo][standalone] Compact pointers for Caches/Batches

This CL introduces configuration options to allow pointers to be
compacted in the thread-specific caches and transfer batches. This
offers the possibility to have them use 32-bit of space instead of
64-bit for the 64-bit Primary, thus cutting the size of the caches
and batches by nearly half (and as such the memory used in size
class 0). The cost is an additional read from the region information
in the fast path.

This is not a new idea, as it's being used in the sanitizer_common
64-bit primary. The difference here is that it is configurable via
the allocator config, with the possibility of not compacting at all.

This CL enables compacting pointers in the Android and Fuchsia default
configurations.

Differential Revision: https://reviews.llvm.org/D96435

GitOrigin-RevId: 2c56776a319edf33505ca6c7f9be59657cdaf52b
Change-Id: Ie83c8887eaaaef3cd97dbe5b7fd641b4477c9eec
diff --git a/standalone/allocator_config.h b/standalone/allocator_config.h
index 58c2e35..8e103f2 100644
--- a/standalone/allocator_config.h
+++ b/standalone/allocator_config.h
@@ -21,6 +21,35 @@
 
 namespace scudo {
 
+// The combined allocator uses a structure as a template argument that
+// specifies the configuration options for the various subcomponents of the
+// allocator.
+//
+// struct ExampleConfig {
+//   // SizeClasMmap to use with the Primary.
+//   using SizeClassMap = DefaultSizeClassMap;
+//   // Indicates possible support for Memory Tagging.
+//   static const bool MaySupportMemoryTagging = false;
+//   // Defines the Primary allocator to use.
+//   typedef SizeClassAllocator64<ExampleConfig> Primary;
+//   // Log2 of the size of a size class region, as used by the Primary.
+//   static const uptr PrimaryRegionSizeLog = 30U;
+//   // Defines the type and scale of a compact pointer. A compact pointer can
+//   // be understood as the offset of a pointer within the region it belongs
+//   // to, in increments of a power-of-2 scale.
+//   // eg: Ptr = Base + (CompactPtr << Scale).
+//   typedef u32 PrimaryCompactPtrT;
+//   static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
+//   // Defines the minimal & maximal release interval that can be set.
+//   static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
+//   static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
+//   // Defines the type of cache used by the Secondary. Some additional
+//   // configuration entries can be necessary depending on the Cache.
+//   typedef MapAllocatorNoCache SecondaryCache;
+//   // Thread-Specific Data Registry used, shared or exclusive.
+//   template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>;
+// };
+
 // Default configurations for various platforms.
 
 struct DefaultConfig {
@@ -29,10 +58,13 @@
 
 #if SCUDO_CAN_USE_PRIMARY64
   typedef SizeClassAllocator64<DefaultConfig> Primary;
-  static const uptr PrimaryRegionSizeLog = 30U;
+  static const uptr PrimaryRegionSizeLog = 32U;
+  typedef uptr PrimaryCompactPtrT;
+  static const uptr PrimaryCompactPtrScale = 0;
 #else
   typedef SizeClassAllocator32<DefaultConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 19U;
+  typedef uptr PrimaryCompactPtrT;
 #endif
   static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
@@ -55,9 +87,12 @@
 #if SCUDO_CAN_USE_PRIMARY64
   typedef SizeClassAllocator64<AndroidConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 28U;
+  typedef u32 PrimaryCompactPtrT;
+  static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
 #else
   typedef SizeClassAllocator32<AndroidConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 18U;
+  typedef uptr PrimaryCompactPtrT;
 #endif
   static const s32 PrimaryMinReleaseToOsIntervalMs = 1000;
   static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000;
@@ -81,9 +116,12 @@
 #if SCUDO_CAN_USE_PRIMARY64
   typedef SizeClassAllocator64<AndroidSvelteConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 27U;
+  typedef u32 PrimaryCompactPtrT;
+  static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
 #else
   typedef SizeClassAllocator32<AndroidSvelteConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 16U;
+  typedef uptr PrimaryCompactPtrT;
 #endif
   static const s32 PrimaryMinReleaseToOsIntervalMs = 1000;
   static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000;
@@ -107,6 +145,8 @@
 
   typedef SizeClassAllocator64<FuchsiaConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 30U;
+  typedef u32 PrimaryCompactPtrT;
+  static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
   static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
 
diff --git a/standalone/local_cache.h b/standalone/local_cache.h
index 089aeb9..8ee2b1c 100644
--- a/standalone/local_cache.h
+++ b/standalone/local_cache.h
@@ -17,24 +17,25 @@
 
 template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
   typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
+  typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;
 
   struct TransferBatch {
     static const u32 MaxNumCached = SizeClassMap::MaxNumCachedHint;
-    void setFromArray(void **Array, u32 N) {
+    void setFromArray(CompactPtrT *Array, u32 N) {
       DCHECK_LE(N, MaxNumCached);
       Count = N;
-      memcpy(Batch, Array, sizeof(void *) * Count);
+      memcpy(Batch, Array, sizeof(Batch[0]) * Count);
     }
     void clear() { Count = 0; }
-    void add(void *P) {
+    void add(CompactPtrT P) {
       DCHECK_LT(Count, MaxNumCached);
       Batch[Count++] = P;
     }
-    void copyToArray(void **Array) const {
-      memcpy(Array, Batch, sizeof(void *) * Count);
+    void copyToArray(CompactPtrT *Array) const {
+      memcpy(Array, Batch, sizeof(Batch[0]) * Count);
     }
     u32 getCount() const { return Count; }
-    void *get(u32 I) const {
+    CompactPtrT get(u32 I) const {
       DCHECK_LE(I, Count);
       return Batch[I];
     }
@@ -45,7 +46,7 @@
 
   private:
     u32 Count;
-    void *Batch[MaxNumCached];
+    CompactPtrT Batch[MaxNumCached];
   };
 
   void initLinkerInitialized(GlobalStats *S, SizeClassAllocator *A) {
@@ -78,13 +79,10 @@
     // Count, while Chunks might be further off (depending on Count). That keeps
     // the memory accesses in close quarters.
     const uptr ClassSize = C->ClassSize;
-    void *P = C->Chunks[--C->Count];
-    // The jury is still out as to whether any kind of PREFETCH here increases
-    // performance. It definitely decreases performance on Android though.
-    // if (!SCUDO_ANDROID) PREFETCH(P);
+    CompactPtrT CompactP = C->Chunks[--C->Count];
     Stats.add(StatAllocated, ClassSize);
     Stats.sub(StatFree, ClassSize);
-    return P;
+    return Allocator->decompactPtr(ClassId, CompactP);
   }
 
   void deallocate(uptr ClassId, void *P) {
@@ -97,7 +95,8 @@
       drain(C, ClassId);
     // See comment in allocate() about memory accesses.
     const uptr ClassSize = C->ClassSize;
-    C->Chunks[C->Count++] = P;
+    C->Chunks[C->Count++] =
+        Allocator->compactPtr(ClassId, reinterpret_cast<uptr>(P));
     Stats.sub(StatAllocated, ClassSize);
     Stats.add(StatFree, ClassSize);
   }
@@ -124,7 +123,7 @@
     u32 Count;
     u32 MaxCount;
     uptr ClassSize;
-    void *Chunks[2 * TransferBatch::MaxNumCached];
+    CompactPtrT Chunks[2 * TransferBatch::MaxNumCached];
   };
   PerClass PerClassArray[NumClasses];
   LocalStats Stats;
@@ -166,7 +165,8 @@
 
   NOINLINE void drain(PerClass *C, uptr ClassId) {
     const u32 Count = Min(C->MaxCount / 2, C->Count);
-    TransferBatch *B = createBatch(ClassId, C->Chunks[0]);
+    TransferBatch *B =
+        createBatch(ClassId, Allocator->decompactPtr(ClassId, C->Chunks[0]));
     if (UNLIKELY(!B))
       reportOutOfMemory(
           SizeClassAllocator::getSizeByClassId(SizeClassMap::BatchClassId));
diff --git a/standalone/primary32.h b/standalone/primary32.h
index a88a2a6..5b62c15 100644
--- a/standalone/primary32.h
+++ b/standalone/primary32.h
@@ -41,6 +41,7 @@
 
 template <typename Config> class SizeClassAllocator32 {
 public:
+  typedef typename Config::PrimaryCompactPtrT CompactPtrT;
   typedef typename Config::SizeClassMap SizeClassMap;
   // The bytemap can only track UINT8_MAX - 1 classes.
   static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), "");
@@ -67,7 +68,7 @@
 
     u32 Seed;
     const u64 Time = getMonotonicTime();
-    if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
+    if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))
       Seed = static_cast<u32>(
           Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6));
     for (uptr I = 0; I < NumClasses; I++) {
@@ -102,6 +103,14 @@
     PossibleRegions.unmapTestOnly();
   }
 
+  CompactPtrT compactPtr(UNUSED uptr ClassId, uptr Ptr) const {
+    return static_cast<CompactPtrT>(Ptr);
+  }
+
+  void *decompactPtr(UNUSED uptr ClassId, CompactPtrT CompactPtr) const {
+    return reinterpret_cast<void *>(static_cast<uptr>(CompactPtr));
+  }
+
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     SizeClassInfo *Sci = getSizeClassInfo(ClassId);
@@ -359,17 +368,18 @@
     // Fill the transfer batches and put them in the size-class freelist. We
     // need to randomize the blocks for security purposes, so we first fill a
     // local array that we then shuffle before populating the batches.
-    void *ShuffleArray[ShuffleArraySize];
+    CompactPtrT ShuffleArray[ShuffleArraySize];
     DCHECK_LE(NumberOfBlocks, ShuffleArraySize);
 
     uptr P = Region + Offset;
     for (u32 I = 0; I < NumberOfBlocks; I++, P += Size)
-      ShuffleArray[I] = reinterpret_cast<void *>(P);
+      ShuffleArray[I] = reinterpret_cast<CompactPtrT>(P);
     // No need to shuffle the batches size class.
     if (ClassId != SizeClassMap::BatchClassId)
       shuffle(ShuffleArray, NumberOfBlocks, &Sci->RandState);
     for (u32 I = 0; I < NumberOfBlocks;) {
-      TransferBatch *B = C->createBatch(ClassId, ShuffleArray[I]);
+      TransferBatch *B =
+          C->createBatch(ClassId, reinterpret_cast<void *>(ShuffleArray[I]));
       if (UNLIKELY(!B))
         return nullptr;
       const u32 N = Min(MaxCount, NumberOfBlocks - I);
@@ -435,7 +445,7 @@
     if (BlockSize < PageSize / 16U) {
       if (!Force && BytesPushed < Sci->AllocatedUser / 16U)
         return 0;
-      // We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
+      // We want 8x% to 9x% free bytes (the larger the block, the lower the %).
       if ((BytesInFreeList * 100U) / Sci->AllocatedUser <
           (100U - 1U - BlockSize / 16U))
         return 0;
@@ -463,8 +473,11 @@
     auto SkipRegion = [this, First, ClassId](uptr RegionIndex) {
       return (PossibleRegions[First + RegionIndex] - 1U) != ClassId;
     };
-    releaseFreeMemoryToOS(Sci->FreeList, Base, RegionSize, NumberOfRegions,
-                          BlockSize, &Recorder, SkipRegion);
+    auto DecompactPtr = [](CompactPtrT CompactPtr) {
+      return reinterpret_cast<uptr>(CompactPtr);
+    };
+    releaseFreeMemoryToOS(Sci->FreeList, RegionSize, NumberOfRegions, BlockSize,
+                          &Recorder, DecompactPtr, SkipRegion);
     if (Recorder.getReleasedRangesCount() > 0) {
       Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks;
       Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
diff --git a/standalone/primary64.h b/standalone/primary64.h
index 16b1226..7b930c9 100644
--- a/standalone/primary64.h
+++ b/standalone/primary64.h
@@ -42,6 +42,8 @@
 
 template <typename Config> class SizeClassAllocator64 {
 public:
+  typedef typename Config::PrimaryCompactPtrT CompactPtrT;
+  static const uptr CompactPtrScale = Config::PrimaryCompactPtrScale;
   typedef typename Config::SizeClassMap SizeClassMap;
   typedef SizeClassAllocator64<Config> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
@@ -62,7 +64,7 @@
 
     u32 Seed;
     const u64 Time = getMonotonicTime();
-    if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
+    if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))
       Seed = static_cast<u32>(Time ^ (PrimaryBase >> 12));
     const uptr PageSize = getPageSizeCached();
     for (uptr I = 0; I < NumClasses; I++) {
@@ -194,6 +196,24 @@
 
   static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); }
 
+  uptr getCompactPtrBaseByClassId(uptr ClassId) {
+    // If we are not compacting pointers, base everything off of 0.
+    if (sizeof(CompactPtrT) == sizeof(uptr) && CompactPtrScale == 0)
+      return 0;
+    return getRegionInfo(ClassId)->RegionBeg;
+  }
+
+  CompactPtrT compactPtr(uptr ClassId, uptr Ptr) {
+    DCHECK_LE(ClassId, SizeClassMap::LargestClassId);
+    return compactPtrInternal(getCompactPtrBaseByClassId(ClassId), Ptr);
+  }
+
+  void *decompactPtr(uptr ClassId, CompactPtrT CompactPtr) {
+    DCHECK_LE(ClassId, SizeClassMap::LargestClassId);
+    return reinterpret_cast<void *>(
+        decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr));
+  }
+
   static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) {
     const RegionInfo *RegionInfoArray =
         reinterpret_cast<const RegionInfo *>(RegionInfoData);
@@ -265,14 +285,14 @@
   struct UnpaddedRegionInfo {
     HybridMutex Mutex;
     SinglyLinkedList<TransferBatch> FreeList;
-    RegionStats Stats;
-    bool Exhausted;
-    u32 RandState;
     uptr RegionBeg;
+    RegionStats Stats;
+    u32 RandState;
     uptr MappedUser;    // Bytes mapped for user memory.
     uptr AllocatedUser; // Bytes allocated for user memory.
     MapPlatformData Data;
     ReleaseToOsInfo ReleaseInfo;
+    bool Exhausted;
   };
   struct RegionInfo : UnpaddedRegionInfo {
     char Padding[SCUDO_CACHE_LINE_SIZE -
@@ -294,6 +314,14 @@
     return PrimaryBase + (ClassId << Config::PrimaryRegionSizeLog);
   }
 
+  static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) {
+    return static_cast<CompactPtrT>((Ptr - Base) >> CompactPtrScale);
+  }
+
+  static uptr decompactPtrInternal(uptr Base, CompactPtrT CompactPtr) {
+    return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale);
+  }
+
   NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId,
                                            RegionInfo *Region) {
     const uptr Size = getSizeByClassId(ClassId);
@@ -303,12 +331,12 @@
     const uptr MappedUser = Region->MappedUser;
     const uptr TotalUserBytes = Region->AllocatedUser + MaxCount * Size;
     // Map more space for blocks, if necessary.
-    if (UNLIKELY(TotalUserBytes > MappedUser)) {
+    if (TotalUserBytes > MappedUser) {
       // Do the mmap for the user memory.
-      const uptr UserMapSize =
+      const uptr MapSize =
           roundUpTo(TotalUserBytes - MappedUser, MapSizeIncrement);
       const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId);
-      if (RegionBase + MappedUser + UserMapSize > RegionSize) {
+      if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) {
         if (!Region->Exhausted) {
           Region->Exhausted = true;
           ScopedString Str(1024);
@@ -322,14 +350,15 @@
       }
       if (MappedUser == 0)
         Region->Data = Data;
-      if (!map(reinterpret_cast<void *>(RegionBeg + MappedUser), UserMapSize,
-               "scudo:primary",
-               MAP_ALLOWNOMEM | MAP_RESIZABLE |
-                   (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0),
-               &Region->Data))
+      if (UNLIKELY(!map(
+              reinterpret_cast<void *>(RegionBeg + MappedUser), MapSize,
+              "scudo:primary",
+              MAP_ALLOWNOMEM | MAP_RESIZABLE |
+                  (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0),
+              &Region->Data)))
         return nullptr;
-      Region->MappedUser += UserMapSize;
-      C->getStats().add(StatMapped, UserMapSize);
+      Region->MappedUser += MapSize;
+      C->getStats().add(StatMapped, MapSize);
     }
 
     const u32 NumberOfBlocks = Min(
@@ -339,17 +368,20 @@
 
     constexpr u32 ShuffleArraySize =
         MaxNumBatches * TransferBatch::MaxNumCached;
-    void *ShuffleArray[ShuffleArraySize];
+    CompactPtrT ShuffleArray[ShuffleArraySize];
     DCHECK_LE(NumberOfBlocks, ShuffleArraySize);
 
+    const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId);
     uptr P = RegionBeg + Region->AllocatedUser;
     for (u32 I = 0; I < NumberOfBlocks; I++, P += Size)
-      ShuffleArray[I] = reinterpret_cast<void *>(P);
+      ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P);
     // No need to shuffle the batches size class.
     if (ClassId != SizeClassMap::BatchClassId)
       shuffle(ShuffleArray, NumberOfBlocks, &Region->RandState);
     for (u32 I = 0; I < NumberOfBlocks;) {
-      TransferBatch *B = C->createBatch(ClassId, ShuffleArray[I]);
+      TransferBatch *B =
+          C->createBatch(ClassId, reinterpret_cast<void *>(decompactPtrInternal(
+                                      CompactPtrBase, ShuffleArray[I])));
       if (UNLIKELY(!B))
         return nullptr;
       const u32 N = Min(MaxCount, NumberOfBlocks - I);
@@ -409,7 +441,7 @@
     if (BlockSize < PageSize / 16U) {
       if (!Force && BytesPushed < Region->AllocatedUser / 16U)
         return 0;
-      // We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
+      // We want 8x% to 9x% free bytes (the larger the block, the lower the %).
       if ((BytesInFreeList * 100U) / Region->AllocatedUser <
           (100U - 1U - BlockSize / 16U))
         return 0;
@@ -426,11 +458,14 @@
       }
     }
 
-    auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
     ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data);
-    releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg,
-                          Region->AllocatedUser, 1U, BlockSize, &Recorder,
-                          SkipRegion);
+    const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId);
+    auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) {
+      return decompactPtrInternal(CompactPtrBase, CompactPtr);
+    };
+    auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
+    releaseFreeMemoryToOS(Region->FreeList, Region->AllocatedUser, 1U,
+                          BlockSize, &Recorder, DecompactPtr, SkipRegion);
 
     if (Recorder.getReleasedRangesCount() > 0) {
       Region->ReleaseInfo.PushedBlocksAtLastRelease =
diff --git a/standalone/release.h b/standalone/release.h
index 5c11da2..a47ae2e 100644
--- a/standalone/release.h
+++ b/standalone/release.h
@@ -17,17 +17,19 @@
 
 class ReleaseRecorder {
 public:
-  ReleaseRecorder(uptr BaseAddress, MapPlatformData *Data = nullptr)
-      : BaseAddress(BaseAddress), Data(Data) {}
+  ReleaseRecorder(uptr Base, MapPlatformData *Data = nullptr)
+      : Base(Base), Data(Data) {}
 
   uptr getReleasedRangesCount() const { return ReleasedRangesCount; }
 
   uptr getReleasedBytes() const { return ReleasedBytes; }
 
+  uptr getBase() const { return Base; }
+
   // Releases [From, To) range of pages back to OS.
   void releasePageRangeToOS(uptr From, uptr To) {
     const uptr Size = To - From;
-    releasePagesToOS(BaseAddress, From, Size, Data);
+    releasePagesToOS(Base, From, Size, Data);
     ReleasedRangesCount++;
     ReleasedBytes += Size;
   }
@@ -35,7 +37,7 @@
 private:
   uptr ReleasedRangesCount = 0;
   uptr ReleasedBytes = 0;
-  uptr BaseAddress = 0;
+  uptr Base = 0;
   MapPlatformData *Data = nullptr;
 };
 
@@ -179,11 +181,13 @@
   uptr CurrentRangeStatePage = 0;
 };
 
-template <class TransferBatchT, class ReleaseRecorderT, typename SkipRegionT>
+template <class TransferBatchT, class ReleaseRecorderT, typename DecompactPtrT,
+          typename SkipRegionT>
 NOINLINE void
-releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
+releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList,
                       uptr RegionSize, uptr NumberOfRegions, uptr BlockSize,
-                      ReleaseRecorderT *Recorder, SkipRegionT SkipRegion) {
+                      ReleaseRecorderT *Recorder, DecompactPtrT DecompactPtr,
+                      SkipRegionT SkipRegion) {
   const uptr PageSize = getPageSizeCached();
 
   // Figure out the number of chunks per page and whether we can take a fast
@@ -236,9 +240,8 @@
     // Each chunk affects one page only.
     for (const auto &It : FreeList) {
       for (u32 I = 0; I < It.getCount(); I++) {
-        const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
-        // This takes care of P < Base and P >= Base + RoundedSize.
-        if (UNLIKELY(P >= RoundedSize))
+        const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase();
+        if (P >= RoundedSize)
           continue;
         const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
         const uptr PInRegion = P - RegionIndex * RegionSize;
@@ -251,9 +254,8 @@
     const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize;
     for (const auto &It : FreeList) {
       for (u32 I = 0; I < It.getCount(); I++) {
-        const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
-        // This takes care of P < Base and P >= Base + RoundedSize.
-        if (UNLIKELY(P >= RoundedSize))
+        const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase();
+        if (P >= RoundedSize)
           continue;
         const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
         uptr PInRegion = P - RegionIndex * RegionSize;
diff --git a/standalone/size_class_map.h b/standalone/size_class_map.h
index 5ed8e28..7b8bf74 100644
--- a/standalone/size_class_map.h
+++ b/standalone/size_class_map.h
@@ -168,13 +168,24 @@
   }
 };
 
+struct DefaultSizeClassConfig {
+  static const uptr NumBits = 3;
+  static const uptr MinSizeLog = 5;
+  static const uptr MidSizeLog = 8;
+  static const uptr MaxSizeLog = 17;
+  static const u32 MaxNumCachedHint = 10;
+  static const uptr MaxBytesCachedLog = 10;
+};
+
+typedef FixedSizeClassMap<DefaultSizeClassConfig> DefaultSizeClassMap;
+
 struct AndroidSizeClassConfig {
 #if SCUDO_WORDSIZE == 64U
   static const uptr NumBits = 7;
   static const uptr MinSizeLog = 4;
   static const uptr MidSizeLog = 6;
   static const uptr MaxSizeLog = 16;
-  static const u32 MaxNumCachedHint = 14;
+  static const u32 MaxNumCachedHint = 13;
   static const uptr MaxBytesCachedLog = 13;
 
   static constexpr u32 Classes[] = {
@@ -208,31 +219,20 @@
 
 typedef TableSizeClassMap<AndroidSizeClassConfig> AndroidSizeClassMap;
 
-struct DefaultSizeClassConfig {
-  static const uptr NumBits = 3;
-  static const uptr MinSizeLog = 5;
-  static const uptr MidSizeLog = 8;
-  static const uptr MaxSizeLog = 17;
-  static const u32 MaxNumCachedHint = 8;
-  static const uptr MaxBytesCachedLog = 10;
-};
-
-typedef FixedSizeClassMap<DefaultSizeClassConfig> DefaultSizeClassMap;
-
 struct SvelteSizeClassConfig {
 #if SCUDO_WORDSIZE == 64U
   static const uptr NumBits = 4;
   static const uptr MinSizeLog = 4;
   static const uptr MidSizeLog = 8;
   static const uptr MaxSizeLog = 14;
-  static const u32 MaxNumCachedHint = 4;
+  static const u32 MaxNumCachedHint = 13;
   static const uptr MaxBytesCachedLog = 10;
 #else
   static const uptr NumBits = 4;
   static const uptr MinSizeLog = 3;
   static const uptr MidSizeLog = 7;
   static const uptr MaxSizeLog = 14;
-  static const u32 MaxNumCachedHint = 5;
+  static const u32 MaxNumCachedHint = 14;
   static const uptr MaxBytesCachedLog = 10;
 #endif
 };
diff --git a/standalone/tests/combined_test.cpp b/standalone/tests/combined_test.cpp
index d1bdd27..a6c6d82 100644
--- a/standalone/tests/combined_test.cpp
+++ b/standalone/tests/combined_test.cpp
@@ -399,6 +399,8 @@
   static const scudo::uptr PrimaryRegionSizeLog = DeathRegionSizeLog;
   static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
+  typedef scudo::uptr PrimaryCompactPtrT;
+  static const scudo::uptr PrimaryCompactPtrScale = 0;
 
   typedef scudo::MapAllocatorNoCache SecondaryCache;
   template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U, 1U>;
diff --git a/standalone/tests/primary_test.cpp b/standalone/tests/primary_test.cpp
index eed6431..38bf671 100644
--- a/standalone/tests/primary_test.cpp
+++ b/standalone/tests/primary_test.cpp
@@ -58,6 +58,8 @@
   static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
   static const bool MaySupportMemoryTagging = false;
+  typedef scudo::uptr PrimaryCompactPtrT;
+  static const scudo::uptr PrimaryCompactPtrScale = 0;
 };
 
 template <typename SizeClassMapT> struct TestConfig2 {
@@ -66,6 +68,8 @@
   static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
   static const bool MaySupportMemoryTagging = false;
+  typedef scudo::uptr PrimaryCompactPtrT;
+  static const scudo::uptr PrimaryCompactPtrScale = 0;
 };
 
 template <typename SizeClassMapT> struct TestConfig3 {
@@ -74,6 +78,8 @@
   static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
   static const bool MaySupportMemoryTagging = true;
+  typedef scudo::uptr PrimaryCompactPtrT;
+  static const scudo::uptr PrimaryCompactPtrScale = 0;
 };
 
 TEST(ScudoPrimaryTest, BasicPrimary) {
@@ -91,6 +97,8 @@
   static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
   static const bool MaySupportMemoryTagging = false;
+  typedef scudo::uptr PrimaryCompactPtrT;
+  static const scudo::uptr PrimaryCompactPtrScale = 0;
 };
 
 // The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes.
@@ -115,7 +123,7 @@
       break;
     }
     for (scudo::u32 J = 0; J < B->getCount(); J++)
-      memset(B->get(J), 'B', Size);
+      memset(Allocator.decompactPtr(ClassId, B->get(J)), 'B', Size);
     Batches.push_back(B);
   }
   while (!Batches.empty()) {
diff --git a/standalone/tests/release_test.cpp b/standalone/tests/release_test.cpp
index 9e991a7..04c0289 100644
--- a/standalone/tests/release_test.cpp
+++ b/standalone/tests/release_test.cpp
@@ -124,6 +124,8 @@
     for (scudo::uptr I = From; I < To; I += PageSize)
       ReportedPages.insert(I);
   }
+
+  scudo::uptr getBase() const { return 0; }
 };
 
 // Simplified version of a TransferBatch.
@@ -191,9 +193,10 @@
 
     // Release the memory.
     auto SkipRegion = [](UNUSED scudo::uptr RegionIndex) { return false; };
+    auto DecompactPtr = [](scudo::uptr P) { return P; };
     ReleasedPagesRecorder Recorder;
-    releaseFreeMemoryToOS(FreeList, 0, MaxBlocks * BlockSize, 1U, BlockSize,
-                          &Recorder, SkipRegion);
+    releaseFreeMemoryToOS(FreeList, MaxBlocks * BlockSize, 1U, BlockSize,
+                          &Recorder, DecompactPtr, SkipRegion);
 
     // Verify that there are no released pages touched by used chunks and all
     // ranges of free chunks big enough to contain the entire memory pages had