Subzero. Performance tweaks.

Introduces Ice::SmallBitVector.

Modifies ConstantRelocatables so that known offsets (i.e.,
not offsets to the code stream) do no require GlobalContext
allocations.

Modifies Cfg-local containers to use the CfgLocalAllocator.

BUG=
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1738443002 .
diff --git a/Makefile.standalone b/Makefile.standalone
index 12e047f..bd18f32 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -269,6 +269,7 @@
   IceIntrinsics.cpp \
   IceLiveness.cpp \
   IceLoopAnalyzer.cpp \
+  IceMemory.cpp \
   IceOperand.cpp \
   IceRegAlloc.cpp \
   IceRNG.cpp \
@@ -450,8 +451,7 @@
           -i x8664,native,sse4.1,test_vector_ops \
           -i x8664,sandbox,sse4.1,Om1 \
           -i arm32 \
-          -e arm32,sandbox,hwdiv-arm \
-          -e arm32,neon,test_vector_ops
+          -e arm32,sandbox,hwdiv-arm
 	PNACL_BIN_PATH=$(PNACL_BIN_PATH) \
 	$(LLVM_SRC_PATH)/utils/lit/lit.py -sv $(CHECK_XTEST_TESTS)
 endif
diff --git a/src/IceAssembler.h b/src/IceAssembler.h
index b25d9b4..6e5ed15 100644
--- a/src/IceAssembler.h
+++ b/src/IceAssembler.h
@@ -30,6 +30,8 @@
 #include "IceFixups.h"
 #include "IceUtils.h"
 
+#include "llvm/Support/Allocator.h"
+
 namespace Ice {
 
 class Assembler;
@@ -338,7 +340,10 @@
 private:
   const AssemblerKind Kind;
 
-  ArenaAllocator<32 * 1024> Allocator;
+  using AssemblerAllocator =
+      llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, /*SlabSize=*/32 * 1024>;
+  AssemblerAllocator Allocator;
+
   /// FunctionName and IsInternal are transferred from the original Cfg object,
   /// since the Cfg object may be deleted by the time the assembler buffer is
   /// emitted.
diff --git a/src/IceBitVector.h b/src/IceBitVector.h
new file mode 100644
index 0000000..9c99e8a
--- /dev/null
+++ b/src/IceBitVector.h
@@ -0,0 +1,245 @@
+//===- subzero/src/IceBitVector.h - Inline bit vector. ----------*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Defines and implements a bit vector with inline storage. It is a drop
+/// in replacement for llvm::SmallBitVector in subzero -- i.e., not all of
+/// llvm::SmallBitVector interface is implemented.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEBITVECTOR_H
+#define SUBZERO_SRC_ICEBITVECTOR_H
+
+#include "IceDefs.h"
+#include "IceOperand.h"
+
+#include "llvm/Support/MathExtras.h"
+
+#include <algorithm>
+#include <climits>
+#include <memory>
+#include <type_traits>
+
+namespace Ice {
+class SmallBitVector {
+public:
+  using ElementType = uint64_t;
+  static constexpr SizeT BitIndexSize = 6; // log2(NumBitsPerPos);
+  static constexpr SizeT NumBitsPerPos = sizeof(ElementType) * CHAR_BIT;
+  static_assert(1 << BitIndexSize == NumBitsPerPos, "Invalid BitIndexSize.");
+
+  SmallBitVector(const SmallBitVector &BV) { *this = BV; }
+
+  SmallBitVector &operator=(const SmallBitVector &BV) {
+    if (&BV != this) {
+      resize(BV.size());
+      memcpy(Bits, BV.Bits, sizeof(Bits));
+    }
+    return *this;
+  }
+
+  SmallBitVector() { reset(); }
+
+  explicit SmallBitVector(SizeT S) : SmallBitVector() {
+    assert(S <= MaxBits);
+    resize(S);
+  }
+
+  class Reference {
+    Reference() = delete;
+
+  public:
+    Reference(const Reference &) = default;
+    Reference &operator=(const Reference &Rhs) { return *this = (bool)Rhs; }
+    Reference &operator=(bool t) {
+      if (t) {
+        *Data |= _1 << Bit;
+      } else {
+        *Data &= ~(_1 << Bit);
+      }
+      return *this;
+    }
+    operator bool() const { return (*Data & (_1 << Bit)) != 0; }
+
+  private:
+    friend class SmallBitVector;
+    Reference(ElementType *D, SizeT B) : Data(D), Bit(B) {
+      assert(B < NumBitsPerPos);
+    }
+
+    ElementType *const Data;
+    const SizeT Bit;
+  };
+
+  Reference operator[](unsigned Idx) {
+    assert(Idx < size());
+    return Reference(Bits + (Idx >> BitIndexSize),
+                     Idx & ((_1 << BitIndexSize) - 1));
+  }
+
+  bool operator[](unsigned Idx) const {
+    assert(Idx < size());
+    return Bits[Idx >> BitIndexSize] &
+           (_1 << (Idx & ((_1 << BitIndexSize) - 1)));
+  }
+
+  int find_first() const { return find_first<0>(); }
+
+  int find_next(unsigned Prev) const { return find_next<0>(Prev); }
+
+  bool any() const {
+    for (SizeT i = 0; i < BitsElements; ++i) {
+      if (Bits[i]) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  SizeT size() const { return Size; }
+
+  void resize(SizeT Size) {
+    assert(Size <= MaxBits);
+    this->Size = Size;
+  }
+
+  void reserve(SizeT Size) {
+    assert(Size <= MaxBits);
+    (void)Size;
+  }
+
+  void set(unsigned Idx) { (*this)[Idx] = true; }
+
+  void set() {
+    for (SizeT ii = 0; ii < size(); ++ii) {
+      (*this)[ii] = true;
+    }
+  }
+
+  SizeT count() const {
+    SizeT Count = 0;
+    for (SizeT i = 0; i < BitsElements; ++i) {
+      Count += llvm::countPopulation(Bits[i]);
+    }
+    return Count;
+  }
+
+  SmallBitVector operator&(const SmallBitVector &Rhs) const {
+    assert(size() == Rhs.size());
+    SmallBitVector Ret(std::max(size(), Rhs.size()));
+    for (SizeT i = 0; i < BitsElements; ++i) {
+      Ret.Bits[i] = Bits[i] & Rhs.Bits[i];
+    }
+    return Ret;
+  }
+
+  SmallBitVector operator~() const {
+    SmallBitVector Ret = *this;
+    Ret.invert<0>();
+    return Ret;
+  }
+
+  SmallBitVector &operator|=(const SmallBitVector &Rhs) {
+    assert(size() == Rhs.size());
+    resize(std::max(size(), Rhs.size()));
+    for (SizeT i = 0; i < BitsElements; ++i) {
+      Bits[i] |= Rhs.Bits[i];
+    }
+    return *this;
+  }
+
+  SmallBitVector operator|(const SmallBitVector &Rhs) const {
+    assert(size() == Rhs.size());
+    SmallBitVector Ret(std::max(size(), Rhs.size()));
+    for (SizeT i = 0; i < BitsElements; ++i) {
+      Ret.Bits[i] = Bits[i] | Rhs.Bits[i];
+    }
+    return Ret;
+  }
+
+  void reset() { memset(Bits, 0, sizeof(Bits)); }
+
+  void reset(const SmallBitVector &Mask) {
+    for (const auto V : RegNumBVIter(Mask)) {
+      (*this)[unsigned(V)] = false;
+    }
+  }
+
+private:
+  // _1 is the constant 1 of type ElementType.
+  static constexpr ElementType _1 = ElementType(1);
+
+  static constexpr SizeT BitsElements = 2;
+  ElementType Bits[BitsElements];
+
+  // MaxBits is defined here because it needs Bits to be defined.
+  static constexpr SizeT MaxBits = sizeof(Bits) * CHAR_BIT;
+  static_assert(sizeof(Bits) == 16, "Bits must be 16 bytes wide.");
+  SizeT Size = 0;
+
+  template <SizeT Pos>
+  typename std::enable_if<Pos == sizeof(Bits) / sizeof(Bits[0]), int>::type
+  find_first() const {
+    return -1;
+  }
+
+  template <SizeT Pos>
+      typename std::enable_if <
+      Pos<sizeof(Bits) / sizeof(Bits[0]), int>::type find_first() const {
+    if (Bits[Pos] != 0) {
+      return NumBitsPerPos * Pos + llvm::countTrailingZeros(Bits[Pos]);
+    }
+    return find_first<Pos + 1>();
+  }
+
+  template <SizeT Pos>
+  typename std::enable_if<Pos == sizeof(Bits) / sizeof(Bits[0]), int>::type
+  find_next(unsigned) const {
+    return -1;
+  }
+
+  template <SizeT Pos>
+      typename std::enable_if < Pos<sizeof(Bits) / sizeof(Bits[0]), int>::type
+                                find_next(unsigned Prev) const {
+    if (Prev + 1 < (Pos + 1) * NumBitsPerPos) {
+      const ElementType Mask =
+          (ElementType(1) << ((Prev + 1) - Pos * NumBitsPerPos)) - 1;
+      const ElementType B = Bits[Pos] & ~Mask;
+      if (B != 0) {
+        return NumBitsPerPos * Pos + llvm::countTrailingZeros(B);
+      }
+      Prev = (1 + Pos) * NumBitsPerPos - 1;
+    }
+    return find_next<Pos + 1>(Prev);
+  }
+
+  template <SizeT Pos>
+  typename std::enable_if<Pos == sizeof(Bits) / sizeof(Bits[0]), void>::type
+  invert() {}
+
+  template <SizeT Pos>
+      typename std::enable_if <
+      Pos<sizeof(Bits) / sizeof(Bits[0]), void>::type invert() {
+    if (size() < Pos * NumBitsPerPos) {
+      Bits[Pos] = 0;
+    } else if ((Pos + 1) * NumBitsPerPos < size()) {
+      Bits[Pos] ^= ~ElementType(0);
+    } else {
+      const ElementType Mask =
+          (ElementType(1) << (size() - (Pos * NumBitsPerPos))) - 1;
+      Bits[Pos] ^= Mask;
+    }
+    invert<Pos + 1>();
+  }
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICEBITVECTOR_H
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 6565667..2c80e0b 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -29,16 +29,10 @@
 
 namespace Ice {
 
-ICE_TLS_DEFINE_FIELD(const Cfg *, Cfg, CurrentCfg);
-
-ArenaAllocator<> *getCurrentCfgAllocator() {
-  return Cfg::getCurrentCfgAllocator();
-}
-
 Cfg::Cfg(GlobalContext *Ctx, uint32_t SequenceNumber)
     : Ctx(Ctx), SequenceNumber(SequenceNumber),
       VMask(Ctx->getFlags().getVerbose()), NextInstNumber(Inst::NumberInitial),
-      Allocator(new ArenaAllocator<>()), Live(nullptr),
+      Allocator(new ArenaAllocator()), Live(nullptr),
       Target(TargetLowering::createLowering(Ctx->getFlags().getTargetArch(),
                                             this)),
       VMetadata(new VariablesMetadata(this)),
@@ -53,7 +47,7 @@
   }
 }
 
-Cfg::~Cfg() { assert(ICE_TLS_GET_FIELD(CurrentCfg) == nullptr); }
+Cfg::~Cfg() { assert(CfgAllocatorTraits::current() == nullptr); }
 
 /// Create a string like "foo(i=123:b=9)" indicating the function name, number
 /// of high-level instructions, and number of basic blocks.  This string is only
diff --git a/src/IceCfg.h b/src/IceCfg.h
index 7bdc076..64a454c 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -36,16 +36,6 @@
                                      uint32_t SequenceNumber) {
     return std::unique_ptr<Cfg>(new Cfg(Ctx, SequenceNumber));
   }
-  /// Gets a pointer to the current thread's Cfg.
-  static const Cfg *getCurrentCfg() { return ICE_TLS_GET_FIELD(CurrentCfg); }
-  static void setCurrentCfg(const Cfg *Func) {
-    ICE_TLS_SET_FIELD(CurrentCfg, Func);
-  }
-  /// Gets a pointer to the current thread's Cfg's allocator.
-  static ArenaAllocator<> *getCurrentCfgAllocator() {
-    assert(ICE_TLS_GET_FIELD(CurrentCfg));
-    return ICE_TLS_GET_FIELD(CurrentCfg)->Allocator.get();
-  }
 
   GlobalContext *getContext() const { return Ctx; }
   uint32_t getSequenceNumber() const { return SequenceNumber; }
@@ -254,6 +244,8 @@
   }
 
 private:
+  friend class CfgAllocatorTraits; // for Allocator access.
+
   Cfg(GlobalContext *Ctx, uint32_t SequenceNumber);
 
   /// Adds a call to the ProfileSummary runtime function as the first
@@ -298,7 +290,7 @@
   VarList Variables;
   VarList Args;         /// subset of Variables, in argument order
   VarList ImplicitArgs; /// subset of Variables
-  std::unique_ptr<ArenaAllocator<>> Allocator;
+  std::unique_ptr<ArenaAllocator> Allocator;
   std::unique_ptr<Liveness> Live;
   std::unique_ptr<TargetLowering> Target;
   std::unique_ptr<VariablesMetadata> VMetadata;
@@ -313,13 +305,8 @@
   /// should be called to avoid spurious validation failures.
   const CfgNode *CurrentNode = nullptr;
 
-  /// Maintain a pointer in TLS to the current Cfg being translated. This is
-  /// primarily for accessing its allocator statelessly, but other uses are
-  /// possible.
-  ICE_TLS_DECLARE_FIELD(const Cfg *, CurrentCfg);
-
 public:
-  static void TlsInit() { ICE_TLS_INIT_FIELD(CurrentCfg); }
+  static void TlsInit() { CfgAllocatorTraits::init(); }
 };
 
 template <> Variable *Cfg::makeVariable<Variable>(Type Ty);
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index b89666b..3871289 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp
@@ -96,32 +96,32 @@
 
   void convertFunction(const Function *F) {
     Func = Ice::Cfg::create(Ctx, Converter.getNextSequenceNumber());
-    Ice::Cfg::setCurrentCfg(Func.get());
+    {
+      Ice::CfgLocalAllocatorScope _(Func.get());
 
-    VarMap.clear();
-    NodeMap.clear();
-    Func->setFunctionName(F->getName());
-    Func->setReturnType(convertToIceType(F->getReturnType()));
-    Func->setInternal(F->hasInternalLinkage());
-    Ice::TimerMarker T(Ice::TimerStack::TT_llvmConvert, Func.get());
+      VarMap.clear();
+      NodeMap.clear();
+      Func->setFunctionName(F->getName());
+      Func->setReturnType(convertToIceType(F->getReturnType()));
+      Func->setInternal(F->hasInternalLinkage());
+      Ice::TimerMarker T(Ice::TimerStack::TT_llvmConvert, Func.get());
 
-    // The initial definition/use of each arg is the entry node.
-    for (auto ArgI = F->arg_begin(), ArgE = F->arg_end(); ArgI != ArgE;
-         ++ArgI) {
-      Func->addArg(mapValueToIceVar(ArgI));
+      // The initial definition/use of each arg is the entry node.
+      for (auto ArgI = F->arg_begin(), ArgE = F->arg_end(); ArgI != ArgE;
+           ++ArgI) {
+        Func->addArg(mapValueToIceVar(ArgI));
+      }
+
+      // Make an initial pass through the block list just to resolve the blocks
+      // in the original linearized order. Otherwise the ICE linearized order
+      // will be affected by branch targets in terminator instructions.
+      for (const BasicBlock &BBI : *F)
+        mapBasicBlockToNode(&BBI);
+      for (const BasicBlock &BBI : *F)
+        convertBasicBlock(&BBI);
+      Func->setEntryNode(mapBasicBlockToNode(&F->getEntryBlock()));
+      Func->computeInOutEdges();
     }
-
-    // Make an initial pass through the block list just to resolve the blocks
-    // in the original linearized order. Otherwise the ICE linearized order
-    // will be affected by branch targets in terminator instructions.
-    for (const BasicBlock &BBI : *F)
-      mapBasicBlockToNode(&BBI);
-    for (const BasicBlock &BBI : *F)
-      convertBasicBlock(&BBI);
-    Func->setEntryNode(mapBasicBlockToNode(&F->getEntryBlock()));
-    Func->computeInOutEdges();
-
-    Ice::Cfg::setCurrentCfg(nullptr);
     Converter.translateFcn(std::move(Func));
   }
 
diff --git a/src/IceDefs.h b/src/IceDefs.h
index 1222424..c90b052 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -11,14 +11,13 @@
 /// \brief Declares various useful types and classes that have widespread use
 /// across Subzero.
 ///
-/// Every Subzero source file is expected to include IceDefs.h.
-///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEDEFS_H
 #define SUBZERO_SRC_ICEDEFS_H
 
 #include "IceBuildDefs.h" // TODO(stichnot): move into individual files
+#include "IceMemory.h"
 #include "IceTLS.h"
 
 #include "llvm/ADT/ArrayRef.h"
@@ -26,10 +25,8 @@
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/raw_ostream.h"
@@ -46,6 +43,8 @@
 #include <string>
 #include <system_error>
 #include <unordered_map>
+#include <unordered_set>
+#include <utility>
 #include <vector>
 
 namespace Ice {
@@ -75,43 +74,6 @@
 class VariablesMetadata;
 
 constexpr char GlobalOffsetTable[] = "_GLOBAL_OFFSET_TABLE_";
-
-template <size_t SlabSize = 1024 * 1024>
-using ArenaAllocator =
-    llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, SlabSize>;
-
-ArenaAllocator<> *getCurrentCfgAllocator();
-
-template <typename T> struct CfgLocalAllocator {
-  using value_type = T;
-  using pointer = T *;
-  using const_pointer = const T *;
-  using reference = T &;
-  using const_reference = const T &;
-  using size_type = std::size_t;
-  CfgLocalAllocator() = default;
-  template <class U> CfgLocalAllocator(const CfgLocalAllocator<U> &) {}
-  pointer allocate(size_type Num) {
-    return getCurrentCfgAllocator()->Allocate<T>(Num);
-  }
-  void deallocate(pointer, size_type) {}
-  template <class U> struct rebind { typedef CfgLocalAllocator<U> other; };
-  void construct(pointer P, const T &Val) {
-    new (static_cast<void *>(P)) T(Val);
-  }
-  void destroy(pointer P) { P->~T(); }
-};
-template <typename T, typename U>
-inline bool operator==(const CfgLocalAllocator<T> &,
-                       const CfgLocalAllocator<U> &) {
-  return true;
-}
-template <typename T, typename U>
-inline bool operator!=(const CfgLocalAllocator<T> &,
-                       const CfgLocalAllocator<U> &) {
-  return false;
-}
-
 // makeUnique should be used when memory is expected to be allocated from the
 // heap (as opposed to allocated from some Allocator.) It is intended to be
 // used instead of new.
@@ -160,15 +122,21 @@
 using AssignList = InstList;
 
 // Standard library containers with CfgLocalAllocator.
-template <typename T> using CfgVector = std::vector<T, CfgLocalAllocator<T>>;
 template <typename T> using CfgList = std::list<T, CfgLocalAllocator<T>>;
+template <typename T, typename H = std::hash<T>, typename Eq = std::equal_to<T>>
+using CfgUnorderedSet = std::unordered_set<T, H, Eq, CfgLocalAllocator<T>>;
+template <typename T, typename U, typename H = std::hash<T>,
+          typename Eq = std::equal_to<T>>
+using CfgUnorderedMap =
+    std::unordered_map<T, U, H, Eq, CfgLocalAllocator<std::pair<const T, U>>>;
+template <typename T> using CfgVector = std::vector<T, CfgLocalAllocator<T>>;
 
 // Containers that are arena-allocated from the Cfg's allocator.
 using OperandList = CfgVector<Operand *>;
 using VarList = CfgVector<Variable *>;
 using NodeList = CfgVector<CfgNode *>;
 
-// Contains that use the default (global) allocator.
+// Containers that use the default (global) allocator.
 using ConstantList = std::vector<Constant *>;
 using FunctionDeclarationList = std::vector<FunctionDeclaration *>;
 using VariableDeclarationList = std::vector<VariableDeclaration *>;
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 88519d1..79177c2 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -48,14 +48,13 @@
       return hash<Ice::IceString>()(Key.EmitString);
     }
 
-    assert(!Key.OffsetExpr.empty());
-    if (Key.OffsetExpr[0]->hasOffset()) {
-      return hash<Ice::IceString>()(Key.Name) +
-             hash<Ice::RelocOffsetT>()(Key.OffsetExpr[0]->getOffset());
-    }
-
+    // If there's no emit string, then we use the relocatable's name, plus the
+    // hash of a combination of the number of OffsetExprs and the known, fixed
+    // offset for the reloc. We left shift the known relocatable by 5 trying to
+    // minimize the interaction between the bits in OffsetExpr.size() and
+    // Key.Offset.
     return hash<Ice::IceString>()(Key.Name) +
-           hash<std::size_t>()(Key.OffsetExpr.size());
+           hash<std::size_t>()(Key.OffsetExpr.size() + (Key.Offset << 5));
   }
 };
 } // end of namespace std
@@ -293,7 +292,7 @@
 void GlobalContext::translateFunctions() {
   while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) {
     // Install Func in TLS for Cfg-specific container allocators.
-    Cfg::setCurrentCfg(Func.get());
+    CfgLocalAllocatorScope _(Func.get());
     // Reset per-function stats being accumulated in TLS.
     resetStats();
     // Set verbose level to none if the current function does NOT
@@ -309,7 +308,6 @@
         !matchSymbolName(Func->getFunctionName(),
                          getFlags().getTranslateOnly())) {
       Func->dump();
-      Cfg::setCurrentCfg(nullptr);
       continue; // Func goes out of scope and gets deleted
     }
 
@@ -348,7 +346,6 @@
         break;
       }
     }
-    Cfg::setCurrentCfg(nullptr);
     assert(Item);
     emitQueueBlockingPush(Item);
     // The Cfg now gets deleted as Func goes out of scope.
@@ -557,9 +554,8 @@
         // Unfortunately, we have to temporarily install the Cfg in TLS
         // because Variable::asType() uses the allocator to create the
         // differently-typed copy.
-        Cfg::setCurrentCfg(Func.get());
+        CfgLocalAllocatorScope _(Func.get());
         Func->emit();
-        Cfg::setCurrentCfg(nullptr);
         dumpStats(Func->getFunctionNameAndSize());
       } break;
       }
@@ -795,28 +791,28 @@
   return getConstPool()->Doubles.getOrAdd(this, ConstantDouble);
 }
 
-Constant *GlobalContext::getConstantSym(const RelocOffsetArray &Offset,
+Constant *GlobalContext::getConstantSym(const RelocOffsetT Offset,
+                                        const RelocOffsetArray &OffsetExpr,
                                         const IceString &Name,
                                         const IceString &EmitString,
                                         bool SuppressMangling) {
   return getConstPool()->Relocatables.getOrAdd(
-      this, RelocatableTuple(Offset, Name, EmitString, SuppressMangling));
+      this,
+      RelocatableTuple(Offset, OffsetExpr, Name, EmitString, SuppressMangling));
 }
 
 Constant *GlobalContext::getConstantSym(RelocOffsetT Offset,
                                         const IceString &Name,
                                         bool SuppressMangling) {
   constexpr char EmptyEmitString[] = "";
-  return getConstantSym({RelocOffset::create(this, Offset)}, Name,
-                        EmptyEmitString, SuppressMangling);
+  return getConstantSym(Offset, {}, Name, EmptyEmitString, SuppressMangling);
 }
 
 Constant *GlobalContext::getConstantExternSym(const IceString &Name) {
   constexpr RelocOffsetT Offset = 0;
   constexpr bool SuppressMangling = true;
   return getConstPool()->ExternRelocatables.getOrAdd(
-      this, RelocatableTuple({RelocOffset::create(this, Offset)}, Name,
-                             SuppressMangling));
+      this, RelocatableTuple(Offset, {}, Name, SuppressMangling));
 }
 
 Constant *GlobalContext::getConstantUndef(Type Ty) {
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index fe71fa7..4fd9592 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -200,7 +200,8 @@
   Constant *getConstantFloat(float Value);
   Constant *getConstantDouble(double Value);
   /// Returns a symbolic constant.
-  Constant *getConstantSym(const RelocOffsetArray &Offset,
+  Constant *getConstantSym(const RelocOffsetT Offset,
+                           const RelocOffsetArray &OffsetExpr,
                            const IceString &Name, const IceString &EmitString,
                            bool SuppressMangling);
   Constant *getConstantSym(RelocOffsetT Offset, const IceString &Name,
@@ -444,7 +445,7 @@
   ICE_CACHELINE_BOUNDARY;
   // Managed by getAllocator()
   GlobalLockType AllocLock;
-  ArenaAllocator<> Allocator;
+  ArenaAllocator Allocator;
 
   ICE_CACHELINE_BOUNDARY;
   // Managed by getDestructors()
@@ -506,8 +507,8 @@
   // TODO(jpp): move to EmitterContext.
   VariableDeclaration *ProfileBlockInfoVarDecl;
 
-  LockedPtr<ArenaAllocator<>> getAllocator() {
-    return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);
+  LockedPtr<ArenaAllocator> getAllocator() {
+    return LockedPtr<ArenaAllocator>(&Allocator, &AllocLock);
   }
   LockedPtr<ConstantPool> getConstPool() {
     return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock);
diff --git a/src/IceMemory.cpp b/src/IceMemory.cpp
new file mode 100644
index 0000000..936a2c8
--- /dev/null
+++ b/src/IceMemory.cpp
@@ -0,0 +1,36 @@
+//===- subzero/src/IceMemory.cpp - Memory management definitions -*- C++ -*-==//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Implements memory management related routines for subzero.
+/////
+//===----------------------------------------------------------------------===//
+
+#include "IceMemory.h"
+
+#include "IceCfg.h"
+#include "IceTLS.h"
+
+#include <cassert>
+#include <utility>
+
+namespace Ice {
+ICE_TLS_DEFINE_FIELD(ArenaAllocator *, CfgAllocatorTraits, CfgAllocator);
+
+CfgAllocatorTraits::allocator_type CfgAllocatorTraits::current() {
+  return ICE_TLS_GET_FIELD(CfgAllocator);
+}
+
+void CfgAllocatorTraits::set_current(const manager_type *Manager) {
+  ArenaAllocator *Allocator =
+      Manager == nullptr ? nullptr : Manager->Allocator.get();
+  ICE_TLS_SET_FIELD(CfgAllocator, Allocator);
+}
+
+} // end of namespace Ice
diff --git a/src/IceMemory.h b/src/IceMemory.h
new file mode 100644
index 0000000..e52af13
--- /dev/null
+++ b/src/IceMemory.h
@@ -0,0 +1,142 @@
+//===- subzero/src/IceMemory.h - Memory management declarations -*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Declares some useful data structures and routines dealing with
+/// memory management in Subzero (mostly, allocator types.)
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEMEMORY_H
+#define SUBZERO_SRC_ICEMEMORY_H
+
+#include "IceTLS.h"
+
+#include "llvm/Support/Allocator.h"
+
+#include <cstddef>
+#include <mutex>
+
+namespace Ice {
+
+class Cfg;
+class GlobalContext;
+
+using ArenaAllocator =
+    llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, /*SlabSize=*/1024 * 1024>;
+
+class LockedArenaAllocator {
+  LockedArenaAllocator() = delete;
+  LockedArenaAllocator(const LockedArenaAllocator &) = delete;
+  LockedArenaAllocator &operator=(const LockedArenaAllocator &) = delete;
+
+public:
+  LockedArenaAllocator(ArenaAllocator *Alloc, std::mutex *Mutex)
+      : Alloc(Alloc), AutoLock(*Mutex) {}
+  LockedArenaAllocator(LockedArenaAllocator &&) = default;
+  LockedArenaAllocator &operator=(LockedArenaAllocator &&) = default;
+  ~LockedArenaAllocator() = default;
+
+  ArenaAllocator *operator->() { return Alloc; }
+
+private:
+  ArenaAllocator *Alloc;
+  std::unique_lock<std::mutex> AutoLock;
+};
+
+template <typename T, typename Traits> struct sz_allocator {
+  /// std::allocator interface implementation.
+  /// @{
+  using value_type = T;
+  using pointer = T *;
+  using const_pointer = const T *;
+  using reference = T &;
+  using const_reference = const T &;
+  using size_type = std::size_t;
+  using difference_type = std::ptrdiff_t;
+
+  sz_allocator() = default;
+  template <class U> sz_allocator(const sz_allocator<U, Traits> &) {}
+
+  pointer address(reference x) const {
+    return reinterpret_cast<pointer>(&reinterpret_cast<char &>(x));
+  }
+  const_pointer address(const_reference x) const {
+    return reinterpret_cast<const_pointer>(&reinterpret_cast<const char &>(x));
+  }
+
+  pointer allocate(size_type num) {
+    return current()->template Allocate<T>(num);
+  }
+
+  template <typename... A> void construct(pointer P, A &&... Args) {
+    new (static_cast<void *>(P)) T(std::forward<A>(Args)...);
+  }
+
+  void deallocate(pointer, size_type) {}
+
+  template <class U> struct rebind { typedef sz_allocator<U, Traits> other; };
+
+  void destroy(pointer P) { P->~T(); }
+  /// @}
+
+  /// Manages the current underlying allocator.
+  /// @{
+  static typename Traits::allocator_type current() { return Traits::current(); }
+  static void init() { Traits::init(); }
+  /// @}
+};
+
+template <class Traits> struct sz_allocator_scope {
+  explicit sz_allocator_scope(typename Traits::manager_type *Manager) {
+    Traits::set_current(Manager);
+  }
+
+  ~sz_allocator_scope() { Traits::set_current(nullptr); }
+};
+
+template <typename T, typename U, typename Traits>
+inline bool operator==(const sz_allocator<T, Traits> &,
+                       const sz_allocator<U, Traits> &) {
+  return true;
+}
+
+template <typename T, typename U, typename Traits>
+inline bool operator!=(const sz_allocator<T, Traits> &,
+                       const sz_allocator<U, Traits> &) {
+  return false;
+}
+
+class CfgAllocatorTraits {
+  CfgAllocatorTraits() = delete;
+  CfgAllocatorTraits(const CfgAllocatorTraits &) = delete;
+  CfgAllocatorTraits &operator=(const CfgAllocatorTraits &) = delete;
+  ~CfgAllocatorTraits() = delete;
+
+public:
+  using allocator_type = ArenaAllocator *;
+  using manager_type = Cfg;
+
+  static void init() { ICE_TLS_INIT_FIELD(CfgAllocator); };
+
+  static allocator_type current();
+  static void set_current(const manager_type *Manager);
+
+private:
+  ICE_TLS_DECLARE_FIELD(ArenaAllocator *, CfgAllocator);
+};
+
+template <typename T>
+using CfgLocalAllocator = sz_allocator<T, CfgAllocatorTraits>;
+
+using CfgLocalAllocatorScope = sz_allocator_scope<CfgAllocatorTraits>;
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICEMEMORY_H
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index 73b0958..27dc6a3 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -19,6 +19,7 @@
 #include "IceCfgNode.h"
 #include "IceInst.h"
 #include "IceInstVarIter.h"
+#include "IceMemory.h"
 #include "IceTargetLowering.h" // dumping stack/frame pointer register
 
 namespace Ice {
@@ -40,8 +41,8 @@
   }
 
   bool BothHaveKnownOffsets = true;
-  RelocOffsetT OffsetA = 0;
-  RelocOffsetT OffsetB = 0;
+  RelocOffsetT OffsetA = A.Offset;
+  RelocOffsetT OffsetB = B.Offset;
   for (SizeT i = 0; i < A.OffsetExpr.size() && BothHaveKnownOffsets; ++i) {
     BothHaveKnownOffsets = A.OffsetExpr[i]->hasOffset();
     if (BothHaveKnownOffsets) {
@@ -197,7 +198,8 @@
   // Variable.
   if (!BuildDefs::dump() || getType() == Ty)
     return this;
-  Variable *V = new (getCurrentCfgAllocator()->Allocate<Variable>())
+  static constexpr SizeT One = 1;
+  Variable *V = new (CfgLocalAllocator<Variable>().allocate(One))
       Variable(kVariable, Ty, Number);
   V->NameIndex = NameIndex;
   V->RegNum = NewRegNum.hasValue() ? NewRegNum : RegNum;
diff --git a/src/IceOperand.h b/src/IceOperand.h
index 9b46e35..443e2d0 100644
--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -19,8 +19,8 @@
 #ifndef SUBZERO_SRC_ICEOPERAND_H
 #define SUBZERO_SRC_ICEOPERAND_H
 
-#include "IceCfg.h"
 #include "IceDefs.h"
+#include "IceCfg.h"
 #include "IceGlobalContext.h"
 #include "IceTypes.h"
 
@@ -301,18 +301,21 @@
   RelocatableTuple &operator=(const RelocatableTuple &) = delete;
 
 public:
-  RelocatableTuple(const RelocOffsetArray &OffsetExpr, const IceString &Name,
+  RelocatableTuple(const RelocOffsetT Offset,
+                   const RelocOffsetArray &OffsetExpr, const IceString &Name,
                    bool SuppressMangling)
-      : OffsetExpr(OffsetExpr), Name(Name), SuppressMangling(SuppressMangling) {
-  }
-
-  RelocatableTuple(const RelocOffsetArray &OffsetExpr, const IceString &Name,
-                   const IceString &EmitString, bool SuppressMangling)
-      : OffsetExpr(OffsetExpr), Name(Name), EmitString(EmitString),
+      : Offset(Offset), OffsetExpr(OffsetExpr), Name(Name),
         SuppressMangling(SuppressMangling) {}
 
+  RelocatableTuple(const RelocOffsetT Offset,
+                   const RelocOffsetArray &OffsetExpr, const IceString &Name,
+                   const IceString &EmitString, bool SuppressMangling)
+      : Offset(Offset), OffsetExpr(OffsetExpr), Name(Name),
+        EmitString(EmitString), SuppressMangling(SuppressMangling) {}
+
   RelocatableTuple(const RelocatableTuple &) = default;
 
+  const RelocOffsetT Offset;
   const RelocOffsetArray OffsetExpr;
   const IceString Name;
   const IceString EmitString;
@@ -332,16 +335,16 @@
   static ConstantRelocatable *create(GlobalContext *Ctx, Type Ty,
                                      const RelocatableTuple &Tuple) {
     return new (Ctx->allocate<ConstantRelocatable>())
-        ConstantRelocatable(Ty, Tuple.OffsetExpr, Tuple.Name, Tuple.EmitString,
-                            Tuple.SuppressMangling);
+        ConstantRelocatable(Ty, Tuple.Offset, Tuple.OffsetExpr, Tuple.Name,
+                            Tuple.EmitString, Tuple.SuppressMangling);
   }
 
   RelocOffsetT getOffset() const {
-    RelocOffsetT Offset = 0;
+    RelocOffsetT Ret = Offset;
     for (const auto *const OffsetReloc : OffsetExpr) {
-      Offset += OffsetReloc->getOffset();
+      Ret += OffsetReloc->getOffset();
     }
-    return Offset;
+    return Ret;
   }
 
   const IceString &getEmitString() const { return EmitString; }
@@ -361,13 +364,15 @@
   }
 
 private:
-  ConstantRelocatable(Type Ty, const RelocOffsetArray &OffsetExpr,
-                      const IceString &Name, const IceString &EmitString,
-                      bool SuppressMangling)
-      : Constant(kConstRelocatable, Ty), OffsetExpr(OffsetExpr), Name(Name),
-        EmitString(EmitString), SuppressMangling(SuppressMangling) {}
+  ConstantRelocatable(Type Ty, const RelocOffsetT Offset,
+                      const RelocOffsetArray &OffsetExpr, const IceString &Name,
+                      const IceString &EmitString, bool SuppressMangling)
+      : Constant(kConstRelocatable, Ty), Offset(Offset), OffsetExpr(OffsetExpr),
+        Name(Name), EmitString(EmitString), SuppressMangling(SuppressMangling) {
+  }
 
-  const RelocOffsetArray OffsetExpr; /// fixed offset to add
+  const RelocOffsetT Offset;         /// fixed, known offset to add
+  const RelocOffsetArray OffsetExpr; /// fixed, unknown offset to add
   const IceString Name;              /// optional for debug/dump
   const IceString EmitString;        /// optional for textual emission
   const bool SuppressMangling;
@@ -463,7 +468,7 @@
   bool operator>=(const RegNumT &) = delete;
 };
 
-/// RegNumBVIter wraps llvm::SmallBitVector so that instead of this pattern:
+/// RegNumBVIter wraps SmallBitVector so that instead of this pattern:
 ///
 ///   for (int i = V.find_first(); i != -1; i = V.find_next(i)) {
 ///     RegNumT RegNum = RegNumT::fromInt(i);
@@ -475,12 +480,10 @@
 ///   for (RegNumT RegNum : RegNumBVIter(V)) {
 ///     ...
 ///   }
-class RegNumBVIter {
-  using T = llvm::SmallBitVector;
+template <class B> class RegNumBVIterImpl {
+  using T = B;
   static constexpr int Sentinel = -1;
-  RegNumBVIter() = delete;
-  RegNumBVIter(const RegNumBVIter &) = delete;
-  RegNumBVIter &operator=(const RegNumBVIter &) = delete;
+  RegNumBVIterImpl() = delete;
 
 public:
   class Iterator {
@@ -507,7 +510,9 @@
     int Current;
   };
 
-  explicit RegNumBVIter(const T &V) : V(V) {}
+  RegNumBVIterImpl(const RegNumBVIterImpl &) = default;
+  RegNumBVIterImpl &operator=(const RegNumBVIterImpl &) = delete;
+  explicit RegNumBVIterImpl(const T &V) : V(V) {}
   Iterator begin() { return Iterator(V); }
   Iterator end() { return Iterator(V, Sentinel); }
 
@@ -515,6 +520,10 @@
   const T &V;
 };
 
+template <class B> RegNumBVIterImpl<B> RegNumBVIter(const B &BV) {
+  return RegNumBVIterImpl<B>(BV);
+}
+
 /// RegWeight is a wrapper for a uint32_t weight value, with a special value
 /// that represents infinite weight, and an addWeight() method that ensures that
 /// W+infinity=infinity.
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 11a4a1b..8ae852c 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -15,6 +15,7 @@
 
 #include "IceRegAlloc.h"
 
+#include "IceBitVector.h"
 #include "IceCfg.h"
 #include "IceCfgNode.h"
 #include "IceInst.h"
@@ -78,7 +79,7 @@
 }
 
 int32_t findMinWeightIndex(
-    const llvm::SmallBitVector &RegMask,
+    const SmallBitVector &RegMask,
     const llvm::SmallVector<RegWeight, LinearScan::REGS_SIZE> &Weights) {
   int MinWeightIndex = -1;
   for (RegNumT i : RegNumBVIter(RegMask)) {
@@ -413,7 +414,7 @@
       FOREACH_VAR_IN_INST(Var, *I) {
         if (!Var->hasRegTmp())
           continue;
-        const llvm::SmallBitVector &Aliases = *RegAliases[Var->getRegNumTmp()];
+        const auto &Aliases = *RegAliases[Var->getRegNumTmp()];
         for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
           Iter.RegMask[RegAlias] = false;
         }
@@ -458,7 +459,7 @@
     if (Moved) {
       // Decrement Item from RegUses[].
       assert(Item->hasRegTmp());
-      const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
+      const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
       for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
         --RegUses[RegAlias];
         assert(RegUses[RegAlias] >= 0);
@@ -482,7 +483,7 @@
       moveItem(Inactive, Index, Active);
       // Increment Item in RegUses[].
       assert(Item->hasRegTmp());
-      const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
+      const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
       for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
         assert(RegUses[RegAlias] >= 0);
         ++RegUses[RegAlias];
@@ -524,7 +525,7 @@
 
     // That register must be one in the RegMask set, e.g. don't try to prefer
     // the stack pointer as a result of the stacksave intrinsic.
-    const llvm::SmallBitVector &Aliases = *RegAliases[SrcVar->getRegNumTmp()];
+    const auto &Aliases = *RegAliases[SrcVar->getRegNumTmp()];
     const int SrcReg = (Iter.RegMask & Aliases).find_first();
     if (SrcReg == -1)
       continue;
@@ -561,7 +562,7 @@
   for (const Variable *Item : Inactive) {
     if (!Item->rangeOverlaps(Iter.Cur))
       continue;
-    const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
+    const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
       // Don't assert(Iter.Free[RegAlias]) because in theory (though probably
       // never in practice) there could be two inactive variables that were
@@ -593,7 +594,7 @@
       break;
     if (!Item->rangeOverlaps(Iter.Cur))
       continue;
-    const llvm::SmallBitVector &Aliases =
+    const auto &Aliases =
         *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp()
     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
       Iter.Weights[RegAlias].setWeight(RegWeight::Inf);
@@ -616,7 +617,7 @@
   assert(Cur->getRegNumTmp() == RegNum);
   dumpLiveRangeTrace("Precoloring  ", Cur);
   Active.push_back(Cur);
-  const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
+  const auto &Aliases = *RegAliases[RegNum];
   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
     assert(RegUses[RegAlias] >= 0);
     ++RegUses[RegAlias];
@@ -629,7 +630,7 @@
 void LinearScan::allocatePreferredRegister(IterationState &Iter) {
   Iter.Cur->setRegNumTmp(Iter.PreferReg);
   dumpLiveRangeTrace("Preferring   ", Iter.Cur);
-  const llvm::SmallBitVector &Aliases = *RegAliases[Iter.PreferReg];
+  const auto &Aliases = *RegAliases[Iter.PreferReg];
   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
     assert(RegUses[RegAlias] >= 0);
     ++RegUses[RegAlias];
@@ -645,7 +646,7 @@
     dumpLiveRangeTrace("Allocating Y ", Iter.Cur);
   else
     dumpLiveRangeTrace("Allocating X ", Iter.Cur);
-  const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
+  const auto &Aliases = *RegAliases[RegNum];
   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
     assert(RegUses[RegAlias] >= 0);
     ++RegUses[RegAlias];
@@ -658,7 +659,7 @@
   for (const Variable *Item : Active) {
     assert(Item->rangeOverlaps(Iter.Cur));
     assert(Item->hasRegTmp());
-    const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
+    const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
     // We add the Item's weight to each alias/subregister to represent that,
     // should we decide to pick any of them, then we would incur that many
     // memory accesses.
@@ -672,7 +673,7 @@
     if (!Item->rangeOverlaps(Iter.Cur))
       continue;
     assert(Item->hasRegTmp());
-    const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
+    const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
     RegWeight W = Item->getWeight(Func);
     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
       Iter.Weights[RegAlias].addWeight(W);
@@ -726,14 +727,14 @@
 
   // Evict all live ranges in Active that register number MinWeightIndex is
   // assigned to.
-  const llvm::SmallBitVector &Aliases = *RegAliases[MinWeightIndex];
+  const auto &Aliases = *RegAliases[MinWeightIndex];
   for (SizeT I = Active.size(); I > 0; --I) {
     const SizeT Index = I - 1;
     Variable *Item = Active[Index];
     const auto RegNum = Item->getRegNumTmp();
     if (Aliases[RegNum]) {
       dumpLiveRangeTrace("Evicting A   ", Item);
-      const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
+      const auto &Aliases = *RegAliases[RegNum];
       for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
         --RegUses[RegAlias];
         assert(RegUses[RegAlias] >= 0);
@@ -771,9 +772,9 @@
   dumpLiveRangeTrace("Allocating Z ", Iter.Cur);
 }
 
-void LinearScan::assignFinalRegisters(
-    const llvm::SmallBitVector &RegMaskFull,
-    const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) {
+void LinearScan::assignFinalRegisters(const SmallBitVector &RegMaskFull,
+                                      const SmallBitVector &PreDefinedRegisters,
+                                      bool Randomized) {
   const size_t NumRegisters = RegMaskFull.size();
   llvm::SmallVector<RegNumT, REGS_SIZE> Permutation(NumRegisters);
   if (Randomized) {
@@ -825,15 +826,14 @@
 //
 // Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results
 // are assigned to Variable::RegNum for each Variable.
-void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
-                      bool Randomized) {
+void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) {
   TimerMarker T(TimerStack::TT_linearScan, Func);
   assert(RegMaskFull.any()); // Sanity check
   if (Verbose)
     Ctx->lockStr();
   Func->resetCurrentNode();
   const size_t NumRegisters = RegMaskFull.size();
-  llvm::SmallBitVector PreDefinedRegisters(NumRegisters);
+  SmallBitVector PreDefinedRegisters(NumRegisters);
   if (Randomized) {
     for (Variable *Var : UnhandledPrecolored) {
       PreDefinedRegisters[Var->getRegNum()] = true;
@@ -855,7 +855,7 @@
   const TargetLowering::RegSetMask RegsInclude =
       TargetLowering::RegSet_CallerSave;
   const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None;
-  const llvm::SmallBitVector KillsMask =
+  const SmallBitVector KillsMask =
       Target->getRegisterSet(RegsInclude, RegsExclude);
 
   // Allocate memory once outside the loop.
@@ -901,7 +901,7 @@
     // Disable AllowOverlap if an Active variable, which is not Prefer, shares
     // Prefer's register, and has a definition within Cur's live range.
     if (Iter.AllowOverlap) {
-      const llvm::SmallBitVector &Aliases = *RegAliases[Iter.PreferReg];
+      const auto &Aliases = *RegAliases[Iter.PreferReg];
       for (const Variable *Item : Active) {
         const RegNumT RegNum = Item->getRegNumTmp();
         if (Item != Iter.Prefer && Aliases[RegNum] &&
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
index 67f4137..da0642b 100644
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
@@ -19,6 +19,7 @@
 #define SUBZERO_SRC_ICEREGALLOC_H
 
 #include "IceDefs.h"
+#include "IceBitVector.h"
 #include "IceOperand.h"
 #include "IceTypes.h"
 
@@ -32,7 +33,7 @@
 public:
   explicit LinearScan(Cfg *Func);
   void init(RegAllocKind Kind);
-  void scan(const llvm::SmallBitVector &RegMask, bool Randomized);
+  void scan(const SmallBitVector &RegMask, bool Randomized);
   // Returns the number of times some variable has been assigned a register but
   // later evicted because of a higher-priority allocation.  The idea is that we
   // can implement "second-chance bin-packing" by rerunning register allocation
@@ -60,11 +61,11 @@
     Variable *Prefer = nullptr;
     RegNumT PreferReg;
     bool AllowOverlap = false;
-    llvm::SmallBitVector RegMask;
-    llvm::SmallBitVector RegMaskUnfiltered;
-    llvm::SmallBitVector Free;
-    llvm::SmallBitVector FreeUnfiltered;
-    llvm::SmallBitVector PrecoloredUnhandledMask; // Note: only used for dumping
+    SmallBitVector RegMask;
+    SmallBitVector RegMaskUnfiltered;
+    SmallBitVector Free;
+    SmallBitVector FreeUnfiltered;
+    SmallBitVector PrecoloredUnhandledMask; // Note: only used for dumping
     llvm::SmallVector<RegWeight, REGS_SIZE> Weights;
   };
 
@@ -102,8 +103,8 @@
   void allocatePreferredRegister(IterationState &Iter);
   void allocateFreeRegister(IterationState &Iter, bool Filtered);
   void handleNoFreeRegisters(IterationState &Iter);
-  void assignFinalRegisters(const llvm::SmallBitVector &RegMaskFull,
-                            const llvm::SmallBitVector &PreDefinedRegisters,
+  void assignFinalRegisters(const SmallBitVector &RegMaskFull,
+                            const SmallBitVector &PreDefinedRegisters,
                             bool Randomized);
   /// @}
 
@@ -125,9 +126,7 @@
   /// currently assigned to. It can be greater than 1 as a result of
   /// AllowOverlap inference.
   llvm::SmallVector<int32_t, REGS_SIZE> RegUses;
-  // TODO(jpp): for some architectures a SmallBitVector might not be big
-  // enough. Evaluate what the performance impact on those architectures is.
-  llvm::SmallVector<const llvm::SmallBitVector *, REGS_SIZE> RegAliases;
+  llvm::SmallVector<const SmallBitVector *, REGS_SIZE> RegAliases;
   bool FindPreference = false;
   bool FindOverlap = false;
 
diff --git a/src/IceTLS.h b/src/IceTLS.h
index cd12da7..14865a9 100644
--- a/src/IceTLS.h
+++ b/src/IceTLS.h
@@ -66,6 +66,8 @@
 // called. The F__key field is used as the argument to pthread_getspecific()
 // and pthread_setspecific().
 
+#include "llvm/Support/ErrorHandling.h"
+
 #include <pthread.h>
 
 #define ICE_TLS_DECLARE_FIELD(Type, FieldName)                                 \
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index f0cf542..c6e445e 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -118,7 +118,7 @@
 
 namespace {
 
-void printRegisterSet(Ostream &Str, const llvm::SmallBitVector &Bitset,
+void printRegisterSet(Ostream &Str, const SmallBitVector &Bitset,
                       std::function<IceString(RegNumT)> getRegName,
                       const IceString &LineIndentString) {
   constexpr size_t RegistersPerLine = 16;
@@ -162,14 +162,13 @@
 } // end of anonymous namespace
 
 void TargetLowering::filterTypeToRegisterSet(
-    GlobalContext *Ctx, int32_t NumRegs,
-    llvm::SmallBitVector TypeToRegisterSet[], size_t TypeToRegisterSetSize,
-    std::function<IceString(RegNumT)> getRegName,
+    GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[],
+    size_t TypeToRegisterSetSize, std::function<IceString(RegNumT)> getRegName,
     std::function<IceString(RegClass)> getRegClassName) {
-  std::vector<llvm::SmallBitVector> UseSet(TypeToRegisterSetSize,
-                                           llvm::SmallBitVector(NumRegs));
-  std::vector<llvm::SmallBitVector> ExcludeSet(TypeToRegisterSetSize,
-                                               llvm::SmallBitVector(NumRegs));
+  std::vector<SmallBitVector> UseSet(TypeToRegisterSetSize,
+                                     SmallBitVector(NumRegs));
+  std::vector<SmallBitVector> ExcludeSet(TypeToRegisterSetSize,
+                                         SmallBitVector(NumRegs));
 
   std::unordered_map<IceString, RegNumT> RegNameToIndex;
   for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) {
@@ -185,7 +184,7 @@
   // bit is set in RegSet[][].  If "<class>:" is missing, then the bit is set
   // for all classes.
   auto processRegList = [&](const ClFlags::StringVector &RegNames,
-                            std::vector<llvm::SmallBitVector> &RegSet) {
+                            std::vector<SmallBitVector> &RegSet) {
     for (const IceString &RegClassAndName : RegNames) {
       IceString RClass;
       IceString RName;
@@ -219,9 +218,9 @@
 
   // Apply filters.
   for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
-    llvm::SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex];
-    llvm::SmallBitVector *UseBitSet = &UseSet[TypeIndex];
-    llvm::SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex];
+    SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex];
+    SmallBitVector *UseBitSet = &UseSet[TypeIndex];
+    SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex];
     if (UseBitSet->any())
       *TypeBitSet = *UseBitSet;
     (*TypeBitSet).reset(*ExcludeBitSet);
@@ -470,7 +469,7 @@
   RegInclude |= RegSet_CalleeSave;
   if (hasFramePointer())
     RegExclude |= RegSet_FramePointer;
-  llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
+  SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
   bool Repeat = (Kind == RAK_Global && Ctx->getFlags().shouldRepeatRegAlloc());
   do {
     LinearScan.init(Kind);
@@ -539,7 +538,7 @@
 }
 
 void TargetLowering::getVarStackSlotParams(
-    VarList &SortedSpilledVariables, llvm::SmallBitVector &RegsUsed,
+    VarList &SortedSpilledVariables, SmallBitVector &RegsUsed,
     size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
     uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
     std::function<bool(Variable *)> TargetVarHook) {
@@ -570,7 +569,7 @@
   // returns a second time.
   const bool SimpleCoalescing = !callsReturnsTwice();
 
-  std::vector<size_t> LocalsSize(Func->getNumNodes());
+  CfgVector<size_t> LocalsSize(Func->getNumNodes());
   const VarList &Variables = Func->getVariables();
   VarList SpilledVariables;
   for (Variable *Var : Variables) {
@@ -668,7 +667,7 @@
     SpillAreaPaddingBytes += TestPadding;
   size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
   size_t NextStackOffset = SpillAreaPaddingBytes;
-  std::vector<size_t> LocalsSize(Func->getNumNodes());
+  CfgVector<size_t> LocalsSize(Func->getNumNodes());
   const bool SimpleCoalescing = !callsReturnsTwice();
 
   for (Variable *Var : SortedSpilledVariables) {
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 36e945f..86721a2 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -23,8 +23,9 @@
 #ifndef SUBZERO_SRC_ICETARGETLOWERING_H
 #define SUBZERO_SRC_ICETARGETLOWERING_H
 
-#include "IceCfgNode.h"
 #include "IceDefs.h"
+#include "IceBitVector.h"
+#include "IceCfgNode.h"
 #include "IceInst.h" // for the names of the Inst subtypes
 #include "IceOperand.h"
 #include "IceTypes.h"
@@ -273,24 +274,24 @@
   };
   using RegSetMask = uint32_t;
 
-  virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
-                                              RegSetMask Exclude) const = 0;
+  virtual SmallBitVector getRegisterSet(RegSetMask Include,
+                                        RegSetMask Exclude) const = 0;
   /// Get the set of physical registers available for the specified Variable's
   /// register class, applying register restrictions from the command line.
-  virtual const llvm::SmallBitVector &
+  virtual const SmallBitVector &
   getRegistersForVariable(const Variable *Var) const = 0;
   /// Get the set of *all* physical registers available for the specified
   /// Variable's register class, *not* applying register restrictions from the
   /// command line.
-  virtual const llvm::SmallBitVector &
+  virtual const SmallBitVector &
   getAllRegistersForVariable(const Variable *Var) const = 0;
-  virtual const llvm::SmallBitVector &getAliasesForRegister(RegNumT) const = 0;
+  virtual const SmallBitVector &getAliasesForRegister(RegNumT) const = 0;
 
   void regAlloc(RegAllocKind Kind);
 
   virtual void
   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
-                                const llvm::SmallBitVector &ExcludeRegisters,
+                                const SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) const = 0;
 
   /// Get the minimum number of clusters required for a jump table to be
@@ -365,7 +366,7 @@
   // Applies command line filters to TypeToRegisterSet array.
   static void
   filterTypeToRegisterSet(GlobalContext *Ctx, int32_t NumRegs,
-                          llvm::SmallBitVector TypeToRegisterSet[],
+                          SmallBitVector TypeToRegisterSet[],
                           size_t TypeToRegisterSetSize,
                           std::function<IceString(RegNumT)> getRegName,
                           std::function<IceString(RegClass)> getRegClassName);
@@ -429,8 +430,8 @@
   /// TargetVarHook. If the TargetVarHook returns true, then the variable is
   /// skipped and not considered with the rest of the spilled variables.
   void getVarStackSlotParams(VarList &SortedSpilledVariables,
-                             llvm::SmallBitVector &RegsUsed,
-                             size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
+                             SmallBitVector &RegsUsed, size_t *GlobalsSize,
+                             size_t *SpillAreaSizeBytes,
                              uint32_t *SpillAreaAlignmentBytes,
                              uint32_t *LocalsSlotsAlignmentBytes,
                              std::function<bool(Variable *)> TargetVarHook);
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 9db8a77..e5abe21 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -301,13 +301,13 @@
 void TargetARM32::staticInit(GlobalContext *Ctx) {
   RegNumT::setLimit(RegARM32::Reg_NUM);
   // Limit this size (or do all bitsets need to be the same width)???
-  llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
-  llvm::SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
-  llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
-  llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);
-  llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
-  llvm::SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
-  llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
+  SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
+  SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
+  SmallBitVector Float32Registers(RegARM32::Reg_NUM);
+  SmallBitVector Float64Registers(RegARM32::Reg_NUM);
+  SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
+  SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
+  SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
   const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
   for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
     const auto &Entry = RegARM32::RegTable[i];
@@ -924,9 +924,6 @@
   AddPcLabel->setRelocOffset(AddPcReloc);
 
   const IceString EmitText = Name;
-  // We need a -8 in the relocation expression to account for the pc's value
-  // read by the first instruction emitted in Finish(PC).
-  auto *Imm8 = RelocOffset::create(Ctx, -8);
 
   auto *MovwReloc = RelocOffset::create(Ctx);
   auto *MovwLabel = InstARM32Label::create(Func, this);
@@ -944,9 +941,10 @@
   //   movt reg, #:upper16:(Symbol - Label - Number)
   //
   // relocations.
-  auto *CRLower = Ctx->getConstantSym({MovwReloc, AddPcReloc, Imm8}, Name,
+  static constexpr RelocOffsetT PcOffset = -8;
+  auto *CRLower = Ctx->getConstantSym(PcOffset, {MovwReloc, AddPcReloc}, Name,
                                       EmitText + " -16", SuppressMangling);
-  auto *CRUpper = Ctx->getConstantSym({MovtReloc, AddPcReloc, Imm8}, Name,
+  auto *CRUpper = Ctx->getConstantSym(PcOffset, {MovtReloc, AddPcReloc}, Name,
                                       EmitText + " -12", SuppressMangling);
 
   Context.insert(MovwLabel);
@@ -1440,9 +1438,8 @@
   Context.init(Node);
   Context.setInsertPoint(Context.getCur());
 
-  llvm::SmallBitVector CalleeSaves =
-      getRegisterSet(RegSet_CalleeSave, RegSet_None);
-  RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
+  SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
+  RegsUsed = SmallBitVector(CalleeSaves.size());
   VarList SortedSpilledVariables;
   size_t GlobalsSize = 0;
   // If there is a separate locals area, this represents that area. Otherwise
@@ -1498,7 +1495,7 @@
   // used registers -- and their aliases. Then, we figure out which GPRs and
   // VFP S registers should be saved. We don't bother saving D/Q registers
   // because their uses are recorded as S regs uses.
-  llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM);
+  SmallBitVector ToPreserve(RegARM32::Reg_NUM);
   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
     if (NeedSandboxing && i == RegARM32::Reg_r9) {
       // r9 is never updated in sandboxed code.
@@ -2143,9 +2140,9 @@
   return nullptr;
 }
 
-llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
-                                                 RegSetMask Exclude) const {
-  llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
+SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
+                                           RegSetMask Exclude) const {
+  SmallBitVector Registers(RegARM32::Reg_NUM);
 
   for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
     const auto &Entry = RegARM32::RegTable[i];
@@ -6053,7 +6050,7 @@
 
 void TargetARM32::makeRandomRegisterPermutation(
     llvm::SmallVectorImpl<RegNumT> &Permutation,
-    const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
+    const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
   (void)Permutation;
   (void)ExcludeRegisters;
   (void)Salt;
@@ -6800,10 +6797,9 @@
   Str << ".eabi_attribute 14, 3   @ Tag_ABI_PCS_R9_use: Not used\n";
 }
 
-llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
-llvm::SmallBitVector
-    TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
-llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
+SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
+SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
+SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
 
 } // end of namespace ARM32
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 83e3c58..8c1d088 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -22,10 +22,6 @@
 #include "IceRegistersARM32.h"
 #include "IceTargetLowering.h"
 
-#include "llvm/ADT/SmallBitVector.h"
-
-#include <unordered_set>
-
 namespace Ice {
 namespace ARM32 {
 
@@ -84,9 +80,9 @@
   Variable *getPhysicalRegister(RegNumT RegNum,
                                 Type Ty = IceType_void) override;
   IceString getRegName(RegNumT RegNum, Type Ty) const override;
-  llvm::SmallBitVector getRegisterSet(RegSetMask Include,
-                                      RegSetMask Exclude) const override;
-  const llvm::SmallBitVector &
+  SmallBitVector getRegisterSet(RegSetMask Include,
+                                RegSetMask Exclude) const override;
+  const SmallBitVector &
   getRegistersForVariable(const Variable *Var) const override {
     RegClass RC = Var->getRegClass();
     switch (RC) {
@@ -97,14 +93,13 @@
       return TypeToRegisterSet[RC];
     }
   }
-  const llvm::SmallBitVector &
+  const SmallBitVector &
   getAllRegistersForVariable(const Variable *Var) const override {
     RegClass RC = Var->getRegClass();
     assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
     return TypeToRegisterSetUnfiltered[RC];
   }
-  const llvm::SmallBitVector &
-  getAliasesForRegister(RegNumT Reg) const override {
+  const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
     return RegisterAliases[Reg];
   }
   bool hasFramePointer() const override { return UsesFramePointer; }
@@ -302,7 +297,7 @@
 
   void
   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
-                                const llvm::SmallBitVector &ExcludeRegisters,
+                                const SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) const override;
 
   // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
@@ -898,7 +893,7 @@
   // TODO(jpp): if the same global G is used in different functions, then this
   // method will emit one G(gotoff) relocation per function.
   IceString createGotoffRelocation(const ConstantRelocatable *CR);
-  std::unordered_set<IceString> KnownGotoffs;
+  CfgUnorderedSet<IceString> KnownGotoffs;
   /// @}
 
   /// Loads the constant relocatable Name to Register. Then invoke Finish to
@@ -1116,11 +1111,10 @@
   bool PrologEmitsFixedAllocas = false;
   uint32_t MaxOutArgsSizeBytes = 0;
   // TODO(jpp): std::array instead of array.
-  static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
-  static llvm::SmallBitVector
-      TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
-  static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
-  llvm::SmallBitVector RegsUsed;
+  static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
+  static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
+  static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
+  SmallBitVector RegsUsed;
   VarList PhysicalRegisters[IceType_NUM];
   VarList PreservedGPRs;
   VarList PreservedSRegs;
@@ -1158,12 +1152,12 @@
 
   private:
     void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);
-    llvm::SmallBitVector GPRegsUsed;
+    SmallBitVector GPRegsUsed;
     CfgVector<RegNumT> GPRArgs;
     CfgVector<RegNumT> I64Args;
 
     void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);
-    llvm::SmallBitVector VFPRegsUsed;
+    SmallBitVector VFPRegsUsed;
     CfgVector<RegNumT> FP32Args;
     CfgVector<RegNumT> FP64Args;
     CfgVector<RegNumT> Vec128Args;
@@ -1177,9 +1171,9 @@
 
   void postambleCtpop64(const InstCall *Instr);
   void preambleDivRem(const InstCall *Instr);
-  std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
+  CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
       ARM32HelpersPreamble;
-  std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
+  CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
       ARM32HelpersPostamble;
 
   class ComputationTracker {
@@ -1236,7 +1230,7 @@
 
     // ComputationMap maps a Variable number to a payload identifying which
     // instruction defined it.
-    using ComputationMap = std::unordered_map<SizeT, ComputationEntry>;
+    using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;
     ComputationMap KnownComputations;
   };
 
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 9010cbe..48b5b03 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -78,12 +78,12 @@
 void TargetMIPS32::staticInit(GlobalContext *Ctx) {
   (void)Ctx;
   RegNumT::setLimit(RegMIPS32::Reg_NUM);
-  llvm::SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
-  llvm::SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
-  llvm::SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
-  llvm::SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
-  llvm::SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
-  llvm::SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
+  SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
+  SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
+  SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
+  SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
+  SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
+  SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
   IntegerRegisters[RegMIPS32::val] = isInt;                                    \
@@ -531,9 +531,9 @@
   return nullptr;
 }
 
-llvm::SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
-                                                  RegSetMask Exclude) const {
-  llvm::SmallBitVector Registers(RegMIPS32::Reg_NUM);
+SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
+                                            RegSetMask Exclude) const {
+  SmallBitVector Registers(RegMIPS32::Reg_NUM);
 
 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
@@ -1173,7 +1173,7 @@
 
 void TargetMIPS32::makeRandomRegisterPermutation(
     llvm::SmallVectorImpl<RegNumT> &Permutation,
-    const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
+    const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
   (void)Permutation;
   (void)ExcludeRegisters;
   (void)Salt;
@@ -1316,9 +1316,9 @@
       << "nomips16\n";
 }
 
-llvm::SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
-llvm::SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
-llvm::SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
+SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
+SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
+SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
 
 } // end of namespace MIPS32
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 625a3dd..5dffe61 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -50,22 +50,21 @@
   Variable *getPhysicalRegister(RegNumT RegNum,
                                 Type Ty = IceType_void) override;
   IceString getRegName(RegNumT RegNum, Type Ty) const override;
-  llvm::SmallBitVector getRegisterSet(RegSetMask Include,
-                                      RegSetMask Exclude) const override;
-  const llvm::SmallBitVector &
+  SmallBitVector getRegisterSet(RegSetMask Include,
+                                RegSetMask Exclude) const override;
+  const SmallBitVector &
   getRegistersForVariable(const Variable *Var) const override {
     RegClass RC = Var->getRegClass();
     assert(RC < RC_Target);
     return TypeToRegisterSet[RC];
   }
-  const llvm::SmallBitVector &
+  const SmallBitVector &
   getAllRegistersForVariable(const Variable *Var) const override {
     RegClass RC = Var->getRegClass();
     assert(RC < RC_Target);
     return TypeToRegisterSetUnfiltered[RC];
   }
-  const llvm::SmallBitVector &
-  getAliasesForRegister(RegNumT Reg) const override {
+  const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
     return RegisterAliases[Reg];
   }
   bool hasFramePointer() const override { return UsesFramePointer; }
@@ -310,15 +309,15 @@
                          RandomNumberGenerator &RNG) override;
   void
   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
-                                const llvm::SmallBitVector &ExcludeRegisters,
+                                const SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) const override;
 
   bool UsesFramePointer = false;
   bool NeedsStackAlignment = false;
-  static llvm::SmallBitVector TypeToRegisterSet[RCMIPS32_NUM];
-  static llvm::SmallBitVector TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
-  static llvm::SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM];
-  llvm::SmallBitVector RegsUsed;
+  static SmallBitVector TypeToRegisterSet[RCMIPS32_NUM];
+  static SmallBitVector TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
+  static SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM];
+  SmallBitVector RegsUsed;
   VarList PhysicalRegisters[IceType_NUM];
 
 private:
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 2a29fe4..be26073 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -111,15 +111,15 @@
 const char *TargetX8632Traits::TargetName = "X8632";
 
 template <>
-std::array<llvm::SmallBitVector, RCX86_NUM>
+std::array<SmallBitVector, RCX86_NUM>
     TargetX86Base<X8632::Traits>::TypeToRegisterSet = {{}};
 
 template <>
-std::array<llvm::SmallBitVector, RCX86_NUM>
+std::array<SmallBitVector, RCX86_NUM>
     TargetX86Base<X8632::Traits>::TypeToRegisterSetUnfiltered = {{}};
 
 template <>
-std::array<llvm::SmallBitVector,
+std::array<SmallBitVector,
            TargetX86Base<X8632::Traits>::Traits::RegisterSet::Reg_NUM>
     TargetX86Base<X8632::Traits>::RegisterAliases = {{}};
 
@@ -250,10 +250,10 @@
     auto *AfterAdd = InstX86Label::create(Func, this);
     AfterAdd->setRelocOffset(AfterAddReloc);
 
-    auto *ImmSize = RelocOffset::create(Ctx, -typeWidthInBytes(IceType_i32));
+    const RelocOffsetT ImmSize = -typeWidthInBytes(IceType_i32);
 
     auto *GotFromPc = llvm::cast<ConstantRelocatable>(
-        Ctx->getConstantSym({AfterAddReloc, BeforeAddReloc, ImmSize},
+        Ctx->getConstantSym(ImmSize, {AfterAddReloc, BeforeAddReloc},
                             GlobalOffsetTable, GlobalOffsetTable, true));
 
     // Insert a new version of InstX86GetIP.
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 2ff3d7a..797b731 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -453,19 +453,19 @@
 public:
   static void initRegisterSet(
       const ::Ice::ClFlags & /*Flags*/,
-      std::array<llvm::SmallBitVector, RCX86_NUM> *TypeToRegisterSet,
-      std::array<llvm::SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases) {
-    llvm::SmallBitVector IntegerRegistersI32(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector IntegerRegistersI16(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector FloatRegisters(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector VectorRegisters(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector Trunc64To8Registers(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector Trunc32To8Registers(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector Trunc16To8Registers(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector Trunc8RcvrRegisters(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector AhRcvrRegisters(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM);
+      std::array<SmallBitVector, RCX86_NUM> *TypeToRegisterSet,
+      std::array<SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases) {
+    SmallBitVector IntegerRegistersI32(RegisterSet::Reg_NUM);
+    SmallBitVector IntegerRegistersI16(RegisterSet::Reg_NUM);
+    SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM);
+    SmallBitVector FloatRegisters(RegisterSet::Reg_NUM);
+    SmallBitVector VectorRegisters(RegisterSet::Reg_NUM);
+    SmallBitVector Trunc64To8Registers(RegisterSet::Reg_NUM);
+    SmallBitVector Trunc32To8Registers(RegisterSet::Reg_NUM);
+    SmallBitVector Trunc16To8Registers(RegisterSet::Reg_NUM);
+    SmallBitVector Trunc8RcvrRegisters(RegisterSet::Reg_NUM);
+    SmallBitVector AhRcvrRegisters(RegisterSet::Reg_NUM);
+    SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM);
 
     static constexpr struct {
       uint16_t Val;
@@ -539,11 +539,10 @@
     (*TypeToRegisterSet)[RCX86_IsAhRcvr] = AhRcvrRegisters;
   }
 
-  static llvm::SmallBitVector
-  getRegisterSet(const ::Ice::ClFlags & /*Flags*/,
-                 TargetLowering::RegSetMask Include,
-                 TargetLowering::RegSetMask Exclude) {
-    llvm::SmallBitVector Registers(RegisterSet::Reg_NUM);
+  static SmallBitVector getRegisterSet(const ::Ice::ClFlags & /*Flags*/,
+                                       TargetLowering::RegSetMask Include,
+                                       TargetLowering::RegSetMask Exclude) {
+    SmallBitVector Registers(RegisterSet::Reg_NUM);
 
 #define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,     \
           isGPR, is64, is32, is16, is8, isXmm, is64To8, is32To8, is16To8,      \
@@ -575,7 +574,7 @@
   static void
   makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func,
                                 llvm::SmallVectorImpl<RegNumT> &Permutation,
-                                const llvm::SmallBitVector &ExcludeRegisters,
+                                const SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) {
     // TODO(stichnot): Declaring Permutation this way loses type/size
     // information. Fix this in conjunction with the caller-side TODO.
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 86956cd..51f6e17 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -103,15 +103,15 @@
 const char *TargetX8664Traits::TargetName = "X8664";
 
 template <>
-std::array<llvm::SmallBitVector, RCX86_NUM>
+std::array<SmallBitVector, RCX86_NUM>
     TargetX86Base<X8664::Traits>::TypeToRegisterSet = {{}};
 
 template <>
-std::array<llvm::SmallBitVector, RCX86_NUM>
+std::array<SmallBitVector, RCX86_NUM>
     TargetX86Base<X8664::Traits>::TypeToRegisterSetUnfiltered = {{}};
 
 template <>
-std::array<llvm::SmallBitVector,
+std::array<SmallBitVector,
            TargetX86Base<X8664::Traits>::Traits::RegisterSet::Reg_NUM>
     TargetX86Base<X8664::Traits>::RegisterAliases = {{}};
 
@@ -626,10 +626,12 @@
     auto *ReturnRelocOffset = RelocOffset::create(Ctx);
     ReturnAddress->setRelocOffset(ReturnRelocOffset);
     constexpr bool SuppressMangling = true;
+    constexpr RelocOffsetT NoFixedOffset = 0;
     const IceString EmitString = ReturnAddress->getName(Func);
-    auto *ReturnReloc = llvm::cast<ConstantRelocatable>(Ctx->getConstantSym(
-        {ReturnRelocOffset}, Ctx->mangleName(Func->getFunctionName()),
-        EmitString, SuppressMangling));
+    auto *ReturnReloc = llvm::cast<ConstantRelocatable>(
+        Ctx->getConstantSym(NoFixedOffset, {ReturnRelocOffset},
+                            Ctx->mangleName(Func->getFunctionName()),
+                            EmitString, SuppressMangling));
     /* AutoBundle scoping */ {
       std::unique_ptr<AutoBundle> Bundler;
       if (CallTargetR == nullptr) {
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h
index 0ee1a69..b739f1a 100644
--- a/src/IceTargetLoweringX8664Traits.h
+++ b/src/IceTargetLoweringX8664Traits.h
@@ -481,20 +481,20 @@
 public:
   static void initRegisterSet(
       const ::Ice::ClFlags &Flags,
-      std::array<llvm::SmallBitVector, RCX86_NUM> *TypeToRegisterSet,
-      std::array<llvm::SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases) {
-    llvm::SmallBitVector IntegerRegistersI64(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector IntegerRegistersI32(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector IntegerRegistersI16(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector FloatRegisters(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector VectorRegisters(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector Trunc64To8Registers(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector Trunc32To8Registers(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector Trunc16To8Registers(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector Trunc8RcvrRegisters(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector AhRcvrRegisters(RegisterSet::Reg_NUM);
-    llvm::SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM);
+      std::array<SmallBitVector, RCX86_NUM> *TypeToRegisterSet,
+      std::array<SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases) {
+    SmallBitVector IntegerRegistersI64(RegisterSet::Reg_NUM);
+    SmallBitVector IntegerRegistersI32(RegisterSet::Reg_NUM);
+    SmallBitVector IntegerRegistersI16(RegisterSet::Reg_NUM);
+    SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM);
+    SmallBitVector FloatRegisters(RegisterSet::Reg_NUM);
+    SmallBitVector VectorRegisters(RegisterSet::Reg_NUM);
+    SmallBitVector Trunc64To8Registers(RegisterSet::Reg_NUM);
+    SmallBitVector Trunc32To8Registers(RegisterSet::Reg_NUM);
+    SmallBitVector Trunc16To8Registers(RegisterSet::Reg_NUM);
+    SmallBitVector Trunc8RcvrRegisters(RegisterSet::Reg_NUM);
+    SmallBitVector AhRcvrRegisters(RegisterSet::Reg_NUM);
+    SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM);
 
     static constexpr struct {
       uint16_t Val;
@@ -580,11 +580,10 @@
     (*TypeToRegisterSet)[RCX86_IsAhRcvr] = AhRcvrRegisters;
   }
 
-  static llvm::SmallBitVector
-  getRegisterSet(const ::Ice::ClFlags &Flags,
-                 TargetLowering::RegSetMask Include,
-                 TargetLowering::RegSetMask Exclude) {
-    llvm::SmallBitVector Registers(RegisterSet::Reg_NUM);
+  static SmallBitVector getRegisterSet(const ::Ice::ClFlags &Flags,
+                                       TargetLowering::RegSetMask Include,
+                                       TargetLowering::RegSetMask Exclude) {
+    SmallBitVector Registers(RegisterSet::Reg_NUM);
 
     const bool NeedSandboxing = Flags.getUseSandboxing();
 #define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,     \
@@ -619,7 +618,7 @@
   static void
   makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func,
                                 llvm::SmallVectorImpl<RegNumT> &Permutation,
-                                const llvm::SmallBitVector &ExcludeRegisters,
+                                const SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) {
     // TODO(stichnot): Declaring Permutation this way loses type/size
     // information. Fix this in conjunction with the caller-side TODO.
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index fd8f818..18a69fd 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -116,24 +116,23 @@
       return "i8fromah"; // 8-bit GPR that ah can be assigned to
     }
   }
-  llvm::SmallBitVector getRegisterSet(RegSetMask Include,
-                                      RegSetMask Exclude) const override;
-  const llvm::SmallBitVector &
+  SmallBitVector getRegisterSet(RegSetMask Include,
+                                RegSetMask Exclude) const override;
+  const SmallBitVector &
   getRegistersForVariable(const Variable *Var) const override {
     RegClass RC = Var->getRegClass();
     assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
     return TypeToRegisterSet[RC];
   }
 
-  const llvm::SmallBitVector &
+  const SmallBitVector &
   getAllRegistersForVariable(const Variable *Var) const override {
     RegClass RC = Var->getRegClass();
     assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
     return TypeToRegisterSetUnfiltered[RC];
   }
 
-  const llvm::SmallBitVector &
-  getAliasesForRegister(RegNumT Reg) const override {
+  const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
     Reg.assertIsValid();
     return RegisterAliases[Reg];
   }
@@ -266,7 +265,7 @@
   void lowerOther(const Inst *Instr) override;
   void lowerRMW(const InstX86FakeRMW *RMW);
   void prelowerPhis() override;
-  uint32_t getCallStackArgumentsSizeBytes(const std::vector<Type> &ArgTypes,
+  uint32_t getCallStackArgumentsSizeBytes(const CfgVector<Type> &ArgTypes,
                                           Type ReturnType);
   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
   void genTargetHelperCallFor(Inst *Instr) override;
@@ -437,7 +436,7 @@
 
   void
   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
-                                const llvm::SmallBitVector &ExcludeRegisters,
+                                const SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) const override;
 
   /// AutoMemorySandboxer emits a bundle-lock/bundle-unlock pair if the
@@ -985,12 +984,11 @@
   size_t FixedAllocaAlignBytes = 0;
   bool PrologEmitsFixedAllocas = false;
   uint32_t MaxOutArgsSizeBytes = 0;
-  static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet;
-  static std::array<llvm::SmallBitVector, RCX86_NUM>
-      TypeToRegisterSetUnfiltered;
-  static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM>
+  static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSet;
+  static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSetUnfiltered;
+  static std::array<SmallBitVector, Traits::RegisterSet::Reg_NUM>
       RegisterAliases;
-  llvm::SmallBitVector RegsUsed;
+  SmallBitVector RegsUsed;
   std::array<VarList, IceType_NUM> PhysicalRegisters;
   // RebasePtr is a Variable that holds the Rebasing pointer (if any) for the
   // current sandboxing type.
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 967eabe..a6bc892 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -152,7 +152,7 @@
   }
   void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
   /// Producers maps Variable::Number to a BoolFoldingEntry.
-  std::unordered_map<SizeT, BoolFoldingEntry<Traits>> Producers;
+  CfgUnorderedMap<SizeT, BoolFoldingEntry<Traits>> Producers;
 };
 
 template <typename Traits>
@@ -962,9 +962,8 @@
   Context.init(Node);
   Context.setInsertPoint(Context.getCur());
 
-  llvm::SmallBitVector CalleeSaves =
-      getRegisterSet(RegSet_CalleeSave, RegSet_None);
-  RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
+  SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
+  RegsUsed = SmallBitVector(CalleeSaves.size());
   VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
   size_t GlobalsSize = 0;
   // If there is a separate locals area, this represents that area. Otherwise
@@ -1000,7 +999,7 @@
   // Add push instructions for preserved registers.
   uint32_t NumCallee = 0;
   size_t PreservedRegsSizeBytes = 0;
-  llvm::SmallBitVector Pushed(CalleeSaves.size());
+  SmallBitVector Pushed(CalleeSaves.size());
   for (RegNumT i : RegNumBVIter(CalleeSaves)) {
     const auto Canonical = Traits::getBaseReg(i);
     assert(Canonical == Traits::getBaseReg(Canonical));
@@ -1256,9 +1255,8 @@
   }
 
   // Add pop instructions for preserved registers.
-  llvm::SmallBitVector CalleeSaves =
-      getRegisterSet(RegSet_CalleeSave, RegSet_None);
-  llvm::SmallBitVector Popped(CalleeSaves.size());
+  SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
+  SmallBitVector Popped(CalleeSaves.size());
   for (int32_t i = CalleeSaves.size() - 1; i >= 0; --i) {
     const auto RegNum = RegNumT::fromInt(i);
     if (RegNum == getFrameReg() && IsEbpBasedFrame)
@@ -1361,7 +1359,7 @@
 }
 
 template <typename TraitsType>
-llvm::SmallBitVector
+SmallBitVector
 TargetX86Base<TraitsType>::getRegisterSet(RegSetMask Include,
                                           RegSetMask Exclude) const {
   return Traits::getRegisterSet(Ctx->getFlags(), Include, Exclude);
@@ -4316,7 +4314,7 @@
   // There might be phi assignments right before the compare+branch, since this
   // could be a backward branch for a loop. This placement of assignments is
   // determined by placePhiStores().
-  std::vector<InstAssign *> PhiAssigns;
+  CfgVector<InstAssign *> PhiAssigns;
   while (auto *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
     if (PhiAssign->getDest() == Dest)
       return false;
@@ -6381,7 +6379,7 @@
       Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
     Cast->setDeleted();
   } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) {
-    std::vector<Type> ArgTypes;
+    CfgVector<Type> ArgTypes;
     Type ReturnType = IceType_void;
     switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) {
     default:
@@ -6439,7 +6437,7 @@
 
 template <typename TraitsType>
 uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
-    const std::vector<Type> &ArgTypes, Type ReturnType) {
+    const CfgVector<Type> &ArgTypes, Type ReturnType) {
   uint32_t OutArgumentsSizeBytes = 0;
   uint32_t XmmArgCount = 0;
   uint32_t GprArgCount = 0;
@@ -6476,8 +6474,10 @@
 uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
     const InstCall *Instr) {
   // Build a vector of the arguments' types.
-  std::vector<Type> ArgTypes;
-  for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
+  const SizeT NumArgs = Instr->getNumArgs();
+  CfgVector<Type> ArgTypes;
+  ArgTypes.reserve(NumArgs);
+  for (SizeT i = 0; i < NumArgs; ++i) {
     Operand *Arg = Instr->getArg(i);
     ArgTypes.emplace_back(Arg->getType());
   }
@@ -6995,7 +6995,7 @@
 template <typename TraitsType>
 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation(
     llvm::SmallVectorImpl<RegNumT> &Permutation,
-    const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
+    const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
   Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
                                         ExcludeRegisters, Salt);
 }
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index ed19f61..1fb84c1 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -1355,33 +1355,35 @@
     // done to install a CfgLocalAllocator for various internal containers.
     Func = Ice::Cfg::create(getTranslator().getContext(),
                             getTranslator().getNextSequenceNumber());
-    Ice::Cfg::setCurrentCfg(Func.get());
+    bool ParserResult;
+    {
+      Ice::CfgLocalAllocatorScope _(Func.get());
 
-    // TODO(kschimpf) Clean up API to add a function signature to a CFG.
-    const Ice::FuncSigType &Signature = FuncDecl->getSignature();
+      // TODO(kschimpf) Clean up API to add a function signature to a CFG.
+      const Ice::FuncSigType &Signature = FuncDecl->getSignature();
 
-    Func->setFunctionName(FuncDecl->getName());
-    Func->setReturnType(Signature.getReturnType());
-    Func->setInternal(FuncDecl->getLinkage() == GlobalValue::InternalLinkage);
-    CurrentNode = installNextBasicBlock();
-    Func->setEntryNode(CurrentNode);
-    for (Ice::Type ArgType : Signature.getArgList()) {
-      Func->addArg(getNextInstVar(ArgType));
+      Func->setFunctionName(FuncDecl->getName());
+      Func->setReturnType(Signature.getReturnType());
+      Func->setInternal(FuncDecl->getLinkage() == GlobalValue::InternalLinkage);
+      CurrentNode = installNextBasicBlock();
+      Func->setEntryNode(CurrentNode);
+      for (Ice::Type ArgType : Signature.getArgList()) {
+        Func->addArg(getNextInstVar(ArgType));
+      }
+
+      ParserResult = ParseThisBlock();
+
+      // Temporarily end per-function timing, which will be resumed by the
+      // translator function. This is because translation may be done
+      // asynchronously in a separate thread.
+      if (TimeThisFunction)
+        getTranslator().getContext()->popTimer(TimerID, StackID);
+
+      // Note: Once any errors have been found, we turn off all translation of
+      // all remaining functions. This allows successive parsing errors to be
+      // reported, without adding extra checks to the translator for such
+      // parsing errors.
     }
-
-    bool ParserResult = ParseThisBlock();
-
-    // Temporarily end per-function timing, which will be resumed by the
-    // translator function. This is because translation may be done
-    // asynchronously in a separate thread.
-    if (TimeThisFunction)
-      getTranslator().getContext()->popTimer(TimerID, StackID);
-
-    Ice::Cfg::setCurrentCfg(nullptr);
-    // Note: Once any errors have been found, we turn off all translation of
-    // all remaining functions. This allows successive parsing errors to be
-    // reported, without adding extra checks to the translator for such parsing
-    // errors.
     if (Context->getNumErrors() == 0 && Func) {
       getTranslator().translateFcn(std::move(Func));
       // The translator now has ownership of Func.