Merge "Implement object lock and unlock entrypoints for x86-64"
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index b030bb4..0596d4f 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1018,8 +1018,8 @@
       vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment);
     }
   } else {
-    DCHECK_EQ(__builtin_popcount(core_spill_mask_), 0);
-    DCHECK_EQ(__builtin_popcount(fp_spill_mask_), 0);
+    DCHECK_EQ(POPCOUNT(core_spill_mask_), 0);
+    DCHECK_EQ(POPCOUNT(fp_spill_mask_), 0);
     DCHECK_EQ(core_vmap_table_.size(), 0u);
     DCHECK_EQ(fp_vmap_table_.size(), 0u);
     vmap_encoder.PushBackUnsigned(0u);  // Size is 0.
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index effc38e..5c839dd 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -1107,7 +1107,7 @@
   // The offset is off by 8 due to the way the ARM CPUs read PC.
   offset -= 8;
   CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt(CountOneBits(kBranchOffsetMask), offset)) << offset;
+  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
 
   // Properly preserve only the bits supported in the instruction.
   offset >>= 2;
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 45d3a97..9001f8a 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -123,7 +123,7 @@
 
 int32_t MipsAssembler::EncodeBranchOffset(int offset, int32_t inst, bool is_jump) {
   CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt(CountOneBits(kBranchOffsetMask), offset)) << offset;
+  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
 
   // Properly preserve only the bits supported in the instruction.
   offset >>= 2;
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 102e126..0e1b25e 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -44,8 +44,8 @@
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
   uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
-  size_t fp_spill_count = __builtin_popcount(fp_core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
+  size_t fp_spill_count = POPCOUNT(fp_core_spills);
   size_t frame_size = method->GetFrameSizeInBytes();
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 3bbec71..eddaa0b 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -233,9 +233,9 @@
 
   mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
   uint32_t spill_mask = method->GetCoreSpillMask();
-  uint32_t numcores = __builtin_popcount(spill_mask);
+  uint32_t numcores = POPCOUNT(spill_mask);
   uint32_t fp_spill_mask = method->GetFpSpillMask();
-  uint32_t numfps = __builtin_popcount(fp_spill_mask);
+  uint32_t numfps = POPCOUNT(fp_spill_mask);
   uint32_t spill_size = (numcores + numfps) * 4;
   LOG(DEBUG) << "spill size: " << spill_size;
   uint8_t* prevframe = prevsp + spill_size;
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index c96ff60..0890fa9 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -47,8 +47,8 @@
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
   uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
-  size_t fp_spill_count = __builtin_popcount(fp_core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
+  size_t fp_spill_count = POPCOUNT(fp_core_spills);
   size_t frame_size = method->GetFrameSizeInBytes();
 
   if (spill_count > 0) {
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index b957708..0950e71 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -43,8 +43,8 @@
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
   uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
-  size_t fp_spill_count = __builtin_popcount(fp_core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
+  size_t fp_spill_count = POPCOUNT(fp_core_spills);
   size_t frame_size = method->GetFrameSizeInBytes();
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context.
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 5cf3001..c68d76a 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -38,7 +38,7 @@
 void X86Context::FillCalleeSaves(const StackVisitor& fr) {
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
   DCHECK_EQ(method->GetFpSpillMask(), 0u);
   size_t frame_size = method->GetFrameSizeInBytes();
   if (spill_count > 0) {
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 3f1f86d..29a7065 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -42,8 +42,8 @@
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
   uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
-  size_t fp_spill_count = __builtin_popcount(fp_core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
+  size_t fp_spill_count = POPCOUNT(fp_core_spills);
   size_t frame_size = method->GetFrameSizeInBytes();
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context.
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 12c0352..3df5101 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -201,7 +201,7 @@
 uint32_t BitVector::NumSetBits() const {
   uint32_t count = 0;
   for (uint32_t word = 0; word < storage_size_; word++) {
-    count += __builtin_popcount(storage_[word]);
+    count += POPCOUNT(storage_[word]);
   }
   return count;
 }
@@ -331,10 +331,10 @@
 
   uint32_t count = 0u;
   for (uint32_t word = 0u; word < word_end; word++) {
-    count += __builtin_popcount(storage[word]);
+    count += POPCOUNT(storage[word]);
   }
   if (partial_word_bits != 0u) {
-    count += __builtin_popcount(storage[word_end] & ~(0xffffffffu << partial_word_bits));
+    count += POPCOUNT(storage[word_end] & ~(0xffffffffu << partial_word_bits));
   }
   return count;
 }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index c9e3c11..703229c 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -663,12 +663,8 @@
                               actual_image_oat_offset);
     return nullptr;
   }
-  // TODO: this registers the oat file now as we may use the oat_dex_file later and we want the
-  //       intern behavior of RegisterOatFile. However, if we take an early return we could remove
-  //       the oat file.
-  const OatFile* opened_oat_file = RegisterOatFile(oat_file.release());
-  const OatFile::OatDexFile* oat_dex_file = opened_oat_file->GetOatDexFile(dex_location,
-                                                                           &dex_location_checksum);
+  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location,
+                                                                    &dex_location_checksum);
   if (oat_dex_file == nullptr) {
     *error_msg = StringPrintf("Failed to find oat file at '%s' containing '%s'", oat_location,
                               dex_location);
@@ -682,7 +678,11 @@
                               actual_dex_checksum);
     return nullptr;
   }
-  return oat_dex_file->OpenDexFile(error_msg);
+  const DexFile* dex_file = oat_dex_file->OpenDexFile(error_msg);
+  if (dex_file != nullptr) {
+    RegisterOatFile(oat_file.release());
+  }
+  return dex_file;
 }
 
 class ScopedFlock {
@@ -773,16 +773,15 @@
     error_msgs->push_back(error_msg);
     return nullptr;
   }
-  const OatFile* oat_file = OatFile::Open(oat_location, oat_location, NULL,
-                                          !Runtime::Current()->IsCompiler(),
-                                          &error_msg);
-  if (oat_file == nullptr) {
+  UniquePtr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
+                                            !Runtime::Current()->IsCompiler(),
+                                            &error_msg));
+  if (oat_file.get() == nullptr) {
     compound_msg = StringPrintf("\nFailed to open generated oat file '%s': %s",
                                 oat_location, error_msg.c_str());
     error_msgs->push_back(compound_msg);
     return nullptr;
   }
-  oat_file = RegisterOatFile(oat_file);
   const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location,
                                                                     &dex_location_checksum);
   if (oat_dex_file == nullptr) {
@@ -797,6 +796,7 @@
           << "dex_location=" << dex_location << " oat_location=" << oat_location << std::hex
           << " dex_location_checksum=" << dex_location_checksum
           << " DexFile::GetLocationChecksum()=" << result->GetLocationChecksum();
+  RegisterOatFile(oat_file.release());
   return result;
 }
 
@@ -857,32 +857,33 @@
     return nullptr;
   }
   *open_failed = false;
+  const DexFile* dex_file = nullptr;
   uint32_t dex_location_checksum;
   if (!DexFile::GetChecksum(dex_location, &dex_location_checksum, error_msg)) {
     // If no classes.dex found in dex_location, it has been stripped or is corrupt, assume oat is
     // up-to-date. This is the common case in user builds for jar's and apk's in the /system
     // directory.
-    const OatFile* opened_oat_file = oat_file.release();
-    opened_oat_file = RegisterOatFile(opened_oat_file);
-    const OatFile::OatDexFile* oat_dex_file = opened_oat_file->GetOatDexFile(dex_location, NULL);
+    const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location, NULL);
     if (oat_dex_file == nullptr) {
       *error_msg = StringPrintf("Dex checksum mismatch for location '%s' and failed to find oat "
                                 "dex file '%s': %s", oat_file_location.c_str(), dex_location,
                                 error_msg->c_str());
       return nullptr;
     }
-    return oat_dex_file->OpenDexFile(error_msg);
+    dex_file = oat_dex_file->OpenDexFile(error_msg);
+  } else {
+    bool verified = VerifyOatFileChecksums(oat_file.get(), dex_location, dex_location_checksum,
+                                           error_msg);
+    if (!verified) {
+      return nullptr;
+    }
+    dex_file = oat_file->GetOatDexFile(dex_location,
+                                       &dex_location_checksum)->OpenDexFile(error_msg);
   }
-
-  bool verified = VerifyOatFileChecksums(oat_file.get(), dex_location, dex_location_checksum,
-                                         error_msg);
-  if (!verified) {
-    return nullptr;
+  if (dex_file != nullptr) {
+    RegisterOatFile(oat_file.release());
   }
-  const OatFile* opened_oat_file = oat_file.release();
-  opened_oat_file = RegisterOatFile(opened_oat_file);
-  return opened_oat_file->GetOatDexFile(dex_location,
-                                        &dex_location_checksum)->OpenDexFile(error_msg);
+  return dex_file;
 }
 
 const DexFile* ClassLinker::FindDexFileInOatFileFromDexLocation(const char* dex_location,
diff --git a/runtime/gc/accounting/gc_allocator.h b/runtime/gc/accounting/gc_allocator.h
index 4fe9367..7dd7cca 100644
--- a/runtime/gc/accounting/gc_allocator.h
+++ b/runtime/gc/accounting/gc_allocator.h
@@ -73,7 +73,7 @@
 // GCAllocatorImpl<T> if kMeasureGCMemoryOverhead is true, std::allocator<T> otherwise.
 template <typename T>
 class GcAllocator : public TypeStaticIf<kMeasureGcMemoryOverhead, GcAllocatorImpl<T>,
-                                        std::allocator<T> >::value {
+                                        std::allocator<T> >::type {
 };
 
 }  // namespace accounting
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 64a849b..662303e 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -257,7 +257,7 @@
     for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
       count += c->NumReferenceInstanceFieldsDuringLinking();
     }
-    CHECK_EQ((size_t)__builtin_popcount(new_reference_offsets), count);
+    CHECK_EQ((size_t)POPCOUNT(new_reference_offsets), count);
   }
   // Not called within a transaction.
   SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, reference_instance_offsets_),
@@ -268,7 +268,7 @@
   if (new_reference_offsets != CLASS_WALK_SUPER) {
     // Sanity check that the number of bits set in the reference offset bitmap
     // agrees with the number of references
-    CHECK_EQ((size_t)__builtin_popcount(new_reference_offsets),
+    CHECK_EQ((size_t)POPCOUNT(new_reference_offsets),
              NumReferenceStaticFieldsDuringLinking());
   }
   // Not called within a transaction.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 20df78e..fbc0460 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1010,8 +1010,8 @@
                              (1 << art::arm::S27) | (1 << art::arm::S28) | (1 << art::arm::S29) |
                              (1 << art::arm::S30) | (1 << art::arm::S31);
     uint32_t fp_spills = type == kSaveAll ? fp_all_spills : 0;
-    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
-                                 __builtin_popcount(fp_spills) /* fprs */ +
+    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
+                                 POPCOUNT(fp_spills) /* fprs */ +
                                  1 /* Method* */) * kArmPointerSize, kStackAlignment);
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
@@ -1024,7 +1024,7 @@
     uint32_t all_spills = (1 << art::mips::S0) | (1 << art::mips::S1);
     uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
                            (type == kSaveAll ? all_spills : 0) | (1 << art::mips::RA);
-    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
+    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
                                 (type == kRefsAndArgs ? 0 : 3) + 1 /* Method* */) *
                                 kMipsPointerSize, kStackAlignment);
     method->SetFrameSizeInBytes(frame_size);
@@ -1035,7 +1035,7 @@
     uint32_t arg_spills = (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
     uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
                          (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
-    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
+    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
                                  1 /* Method* */) * kX86PointerSize, kStackAlignment);
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
@@ -1054,8 +1054,8 @@
         (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
         (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7);
     uint32_t fp_spills = (type == kRefsAndArgs ? fp_arg_spills : 0);
-    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
-                                 __builtin_popcount(fp_spills) /* fprs */ +
+    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
+                                 POPCOUNT(fp_spills) /* fprs */ +
                                  1 /* Method* */) * kX86_64PointerSize, kStackAlignment);
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
@@ -1094,8 +1094,8 @@
                           (1 << art::arm64::D31);
       uint32_t fp_spills = fp_ref_spills | (type == kRefsAndArgs ? fp_arg_spills: 0)
                           | (type == kSaveAll ? fp_all_spills : 0);
-      size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
-                                   __builtin_popcount(fp_spills) /* fprs */ +
+      size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
+                                   POPCOUNT(fp_spills) /* fprs */ +
                                    1 /* Method* */) * kArm64PointerSize, kStackAlignment);
       method->SetFrameSizeInBytes(frame_size);
       method->SetCoreSpillMask(core_spills);
diff --git a/runtime/stack.h b/runtime/stack.h
index afc4f25..73a823a 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -25,6 +25,7 @@
 #include "instruction_set.h"
 #include "mirror/object.h"
 #include "mirror/object_reference.h"
+#include "utils.h"
 #include "verify_object.h"
 
 #include <stdint.h>
@@ -638,8 +639,8 @@
                            size_t frame_size, int reg, InstructionSet isa) {
     DCHECK_EQ(frame_size & (kStackAlignment - 1), 0U);
     DCHECK_NE(reg, static_cast<int>(kVRegInvalid));
-    int spill_size = __builtin_popcount(core_spills) * GetBytesPerGprSpillLocation(isa)
-        + __builtin_popcount(fp_spills) * GetBytesPerFprSpillLocation(isa)
+    int spill_size = POPCOUNT(core_spills) * GetBytesPerGprSpillLocation(isa)
+        + POPCOUNT(fp_spills) * GetBytesPerFprSpillLocation(isa)
         + sizeof(uint32_t);  // Filler.
     int num_ins = code_item->ins_size_;
     int num_regs = code_item->registers_size_ - num_ins;
diff --git a/runtime/utils.h b/runtime/utils.h
index 4b2f230..14a532e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -47,7 +47,7 @@
 };
 
 template<typename T>
-static inline bool IsPowerOfTwo(T x) {
+static constexpr bool IsPowerOfTwo(T x) {
   return (x & (x - 1)) == 0;
 }
 
@@ -115,39 +115,46 @@
 }
 
 // A static if which determines whether to return type A or B based on the condition boolean.
-template <const bool condition, typename A, typename B>
+template <bool condition, typename A, typename B>
 struct TypeStaticIf {
-  typedef A value;
+  typedef A type;
 };
 
 // Specialization to handle the false case.
 template <typename A, typename B>
 struct TypeStaticIf<false, A,  B> {
-  typedef B value;
+  typedef B type;
+};
+
+// Type identity.
+template <typename T>
+struct TypeIdentity {
+  typedef T type;
 };
 
 // For rounding integers.
 template<typename T>
-static inline T RoundDown(T x, int n) {
-  DCHECK(IsPowerOfTwo(n));
-  return (x & -n);
+static constexpr T RoundDown(T x, typename TypeIdentity<T>::type n) {
+  return
+      // DCHECK(IsPowerOfTwo(n)) in a form acceptable in a constexpr function:
+      (kIsDebugBuild && !IsPowerOfTwo(n)) ? (LOG(FATAL) << n << " isn't a power of 2", T(0))
+      : (x & -n);
 }
 
 template<typename T>
-static inline T RoundUp(T x, int n) {
+static constexpr T RoundUp(T x, typename TypeIdentity<T>::type n) {
   return RoundDown(x + n - 1, n);
 }
 
 // For aligning pointers.
 template<typename T>
-static inline T* AlignDown(T* x, int n) {
-  CHECK(IsPowerOfTwo(n));
-  return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(x) & -static_cast<uintptr_t>(n));
+static inline T* AlignDown(T* x, uintptr_t n) {
+  return reinterpret_cast<T*>(RoundDown(reinterpret_cast<uintptr_t>(x), n));
 }
 
 template<typename T>
-static inline T* AlignUp(T* x, int n) {
-  return AlignDown(reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(x) + static_cast<uintptr_t>(n - 1)), n);
+static inline T* AlignUp(T* x, uintptr_t n) {
+  return reinterpret_cast<T*>(RoundUp(reinterpret_cast<uintptr_t>(x), n));
 }
 
 // Implementation is from "Hacker's Delight" by Henry S. Warren, Jr.,
@@ -162,33 +169,25 @@
   return x + 1;
 }
 
-// Implementation is from "Hacker's Delight" by Henry S. Warren, Jr.,
-// figure 5-2, page 66, where the function is called pop.
-static inline int CountOneBits(uint32_t x) {
-  x = x - ((x >> 1) & 0x55555555);
-  x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
-  x = (x + (x >> 4)) & 0x0F0F0F0F;
-  x = x + (x >> 8);
-  x = x + (x >> 16);
-  return static_cast<int>(x & 0x0000003F);
+template<typename T>
+static constexpr int CLZ(T x) {
+  return (sizeof(T) == sizeof(uint32_t))
+      ? __builtin_clz(x)
+      : __builtin_clzll(x);
 }
 
 template<typename T>
-static inline int CLZ(T x) {
-  if (sizeof(T) == sizeof(uint32_t)) {
-    return __builtin_clz(x);
-  } else {
-    return __builtin_clzll(x);
-  }
+static constexpr int CTZ(T x) {
+  return (sizeof(T) == sizeof(uint32_t))
+      ? __builtin_ctz(x)
+      : __builtin_ctzll(x);
 }
 
 template<typename T>
-static inline int CTZ(T x) {
-  if (sizeof(T) == sizeof(uint32_t)) {
-    return __builtin_ctz(x);
-  } else {
-    return __builtin_ctzll(x);
-  }
+static constexpr int POPCOUNT(T x) {
+  return (sizeof(T) == sizeof(uint32_t))
+      ? __builtin_popcount(x)
+      : __builtin_popcountll(x);
 }
 
 static inline uint32_t PointerToLowMemUInt32(const void* p) {
diff --git a/runtime/vmap_table.h b/runtime/vmap_table.h
index 2fbaebe..9821753 100644
--- a/runtime/vmap_table.h
+++ b/runtime/vmap_table.h
@@ -99,7 +99,7 @@
       }
       matches++;
     }
-    CHECK_LT(vmap_offset - matches, static_cast<uint32_t>(__builtin_popcount(spill_mask)));
+    CHECK_LT(vmap_offset - matches, static_cast<uint32_t>(POPCOUNT(spill_mask)));
     uint32_t spill_shifts = 0;
     while (matches != (vmap_offset + 1)) {
       DCHECK_NE(spill_mask, 0u);