[scudo] 32-bit and hardware agnostic support

Summary:
This update introduces i386 support for the Scudo Hardened Allocator, and
offers software alternatives for functions that used to require hardware
specific instruction sets. This should make porting to new architectures
easier.

Among the changes:
- The chunk header has been changed to accomodate the size limitations
  encountered on 32-bit architectures. We now fit everything in 64-bit. This
  was achieved by storing the amount of unused bytes in an allocation rather
  than the size itself, as one can be deduced from the other with the help
  of the GetActuallyAllocatedSize function. As it turns out, this header can
  be used for both 64 and 32 bit, and as such we dropped the requirement for
  the 128-bit compare and exchange instruction support (cmpxchg16b).
- Add 32-bit support for the checksum and the PRNG functions: if the SSE 4.2
  instruction set is supported, use the 32-bit CRC32 instruction, and in the
  XorShift128, use a 32-bit based state instead of 64-bit.
- Add software support for CRC32: if SSE 4.2 is not supported, fallback on a
  software implementation.
- Modify tests that were not 32-bit compliant, and expand them to cover more
  allocation and alignment sizes. The random shuffle test has been deactivated
  for linux-i386 & linux-i686 as the 32-bit sanitizer allocator doesn't
  currently randomize chunks.

Reviewers: alekseyshl, kcc

Subscribers: filcab, llvm-commits, tberghammer, danalbert, srhines, mgorny, modocache

Differential Revision: https://reviews.llvm.org/D26358

llvm-svn: 288255
diff --git a/compiler-rt/lib/scudo/scudo_allocator.h b/compiler-rt/lib/scudo/scudo_allocator.h
index d2450aa..484b7ea 100644
--- a/compiler-rt/lib/scudo/scudo_allocator.h
+++ b/compiler-rt/lib/scudo/scudo_allocator.h
@@ -14,10 +14,6 @@
 #ifndef SCUDO_ALLOCATOR_H_
 #define SCUDO_ALLOCATOR_H_
 
-#ifndef __x86_64__
-# error "The Scudo hardened allocator currently only supports x86_64."
-#endif
-
 #include "scudo_flags.h"
 
 #include "sanitizer_common/sanitizer_allocator.h"
@@ -39,57 +35,38 @@
   ChunkQuarantine = 2
 };
 
-#if SANITIZER_WORDSIZE == 64
-// Our header requires 128 bits of storage on 64-bit platforms, which fits
-// nicely with the alignment requirements. Having the offset saves us from
+// Our header requires 64 bits of storage. Having the offset saves us from
 // using functions such as GetBlockBegin, that is fairly costly. Our first
 // implementation used the MetaData as well, which offers the advantage of
 // being stored away from the chunk itself, but accessing it was costly as
 // well. The header will be atomically loaded and stored using the 16-byte
 // primitives offered by the platform (likely requires cmpxchg16b support).
-typedef unsigned __int128 PackedHeader;
-struct UnpackedHeader {
-  u16  Checksum      : 16;
-  uptr RequestedSize : 40; // Needed for reallocation purposes.
-  u8   State         : 2;  // available, allocated, or quarantined
-  u8   AllocType     : 2;  // malloc, new, new[], or memalign
-  u8   Unused_0_     : 4;
-  uptr Offset        : 12; // Offset from the beginning of the backend
-                           // allocation to the beginning of the chunk itself,
-                           // in multiples of MinAlignment. See comment about
-                           // its maximum value and test in init().
-  u64  Unused_1_     : 36;
-  u16  Salt          : 16;
-};
-#elif SANITIZER_WORDSIZE == 32
-// On 32-bit platforms, our header requires 64 bits.
 typedef u64 PackedHeader;
 struct UnpackedHeader {
-  u16  Checksum      : 12;
-  uptr RequestedSize : 32; // Needed for reallocation purposes.
-  u8   State         : 2;  // available, allocated, or quarantined
-  u8   AllocType     : 2;  // malloc, new, new[], or memalign
-  uptr Offset        : 12; // Offset from the beginning of the backend
-                           // allocation to the beginning of the chunk itself,
-                           // in multiples of MinAlignment. See comment about
-                           // its maximum value and test in Allocator::init().
-  u16  Salt          : 4;
+  u64 Checksum    : 16;
+  u64 UnusedBytes : 24; // Needed for reallocation purposes.
+  u64 State       : 2;  // available, allocated, or quarantined
+  u64 AllocType   : 2;  // malloc, new, new[], or memalign
+  u64 Offset      : 12; // Offset from the beginning of the backend
+                        // allocation to the beginning of the chunk itself,
+                        // in multiples of MinAlignment. See comment about
+                        // its maximum value and test in init().
+  u64 Salt        : 8;
 };
-#else
-# error "Unsupported SANITIZER_WORDSIZE."
-#endif  // SANITIZER_WORDSIZE
 
 typedef std::atomic<PackedHeader> AtomicPackedHeader;
 COMPILER_CHECK(sizeof(UnpackedHeader) == sizeof(PackedHeader));
 
-const uptr ChunkHeaderSize = sizeof(PackedHeader);
-
 // Minimum alignment of 8 bytes for 32-bit, 16 for 64-bit
 const uptr MinAlignmentLog = FIRST_32_SECOND_64(3, 4);
 const uptr MaxAlignmentLog = 24; // 16 MB
 const uptr MinAlignment = 1 << MinAlignmentLog;
 const uptr MaxAlignment = 1 << MaxAlignmentLog;
 
+const uptr ChunkHeaderSize = sizeof(PackedHeader);
+const uptr AlignedChunkHeaderSize =
+    (ChunkHeaderSize + MinAlignment - 1) & ~(MinAlignment - 1);
+
 struct AllocatorOptions {
   u32 QuarantineSizeMb;
   u32 ThreadLocalQuarantineSizeKb;
@@ -120,6 +97,6 @@
 
 #include "scudo_allocator_secondary.h"
 
-} // namespace __scudo
+}  // namespace __scudo
 
 #endif  // SCUDO_ALLOCATOR_H_