Subzero. Introduces a new LoweringContext::insert() method.

Emitting an instruction in Subzero requires a fair amount of
boilerplated code:

Context.insert(<InstType>::create(Func, <Args>...));

The ordeal is worse if one needs access to the recently create
instructionL

auto *Instr = <InstType>::create(Func, <Args>...);
Context.insert(Instr);
Instr->...

This CL introduces a new LoweringContext::insert() method:

template <<InstType>, <Args>...>
<InstType> *LoweringContext::insert(<Args>...) {
  auto *New = Inst::create(Node.Cfg, <Args>...);
  insert(New);
  return New;
}

This is essentially a syntatic sugar that allows instructions to be
emitted by using

Context.insert<InstType>(<Args>...);

The compiler should be able to inline the calls (and get rid of the
return value) when appropriate.

make bloat reviews a small increase in translator code size

BUG=
R=sehr@chromium.org, stichnot@chromium.org

Review URL: https://codereview.chromium.org/1527143003 .
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 045173b..3d139c0 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -23,11 +23,14 @@
 #ifndef SUBZERO_SRC_ICETARGETLOWERING_H
 #define SUBZERO_SRC_ICETARGETLOWERING_H
 
+#include "IceCfgNode.h"
 #include "IceDefs.h"
 #include "IceInst.h" // for the names of the Inst subtypes
 #include "IceOperand.h"
 #include "IceTypes.h"
 
+#include <utility>
+
 namespace Ice {
 
 // UnimplementedError is defined as a macro so that we can get actual line
@@ -72,6 +75,11 @@
   InstList::iterator getNext() const { return Next; }
   InstList::iterator getEnd() const { return End; }
   void insert(Inst *Inst);
+  template <typename Inst, typename... Args> Inst *insert(Args &&... A) {
+    auto *New = Inst::create(Node->getCfg(), std::forward<Args>(A)...);
+    insert(New);
+    return New;
+  }
   Inst *getLastInserted() const;
   void advanceCur() { Cur = Next; }
   void advanceNext() { advanceForward(Next); }
@@ -370,9 +378,9 @@
 
   void
   _bundle_lock(InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) {
-    Context.insert(InstBundleLock::create(Func, BundleOption));
+    Context.insert<InstBundleLock>(BundleOption);
   }
-  void _bundle_unlock() { Context.insert(InstBundleUnlock::create(Func)); }
+  void _bundle_unlock() { Context.insert<InstBundleUnlock>(); }
   void _set_dest_redefined() { Context.getLastInserted()->setDestRedefined(); }
 
   bool shouldOptimizeMemIntrins();
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 1e9a70d..6753c81 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -357,11 +357,10 @@
       assert(TargetHelper != nullptr);
       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
       constexpr SizeT MaxArgs = 2;
-      auto *Call = InstCall::create(Func, MaxArgs, Dest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(Instr->getSrc(0));
       Call->addArg(Instr->getSrc(1));
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -408,7 +407,7 @@
         // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0,
         // we just insert a InstCast right before the call to the helper.
         Variable *Src0_32 = Func->makeVariable(IceType_i32);
-        Context.insert(InstCast::create(Func, CastKind, Src0_32, Src0));
+        Context.insert<InstCast>(CastKind, Src0_32, Src0);
         Src0 = Src0_32;
 
         // For extending Src1, we will just insert an InstCast if Src1 is not a
@@ -426,20 +425,19 @@
           Src1 = Ctx->getConstantInt32(NewC);
         } else {
           Variable *Src1_32 = Func->makeVariable(IceType_i32);
-          Context.insert(InstCast::create(Func, CastKind, Src1_32, Src1));
+          Context.insert<InstCast>(CastKind, Src1_32, Src1);
           Src1 = Src1_32;
         }
       }
       assert(TargetHelper != nullptr);
       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
       constexpr SizeT MaxArgs = 2;
-      auto *Call = InstCall::create(Func, MaxArgs, Dest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       assert(Src0->getType() == IceType_i32);
       Call->addArg(Src0);
       assert(Src1->getType() == IceType_i32);
       Call->addArg(Src1);
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -451,11 +449,10 @@
       constexpr SizeT MaxArgs = 2;
       Operand *TargetHelper = Ctx->getConstantExternSym(
           DestTy == IceType_f32 ? H_frem_f32 : H_frem_f64);
-      auto *Call = InstCall::create(Func, MaxArgs, Dest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(Instr->getSrc(0));
       Call->addArg(Instr->getSrc(1));
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -482,10 +479,9 @@
           Src0IsF32 ? (DestIsSigned ? H_fptosi_f32_i64 : H_fptoui_f32_i64)
                     : (DestIsSigned ? H_fptosi_f64_i64 : H_fptoui_f64_i64));
       static constexpr SizeT MaxArgs = 1;
-      auto *Call = InstCall::create(Func, MaxArgs, Dest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(Src0);
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -500,10 +496,9 @@
           DestIsF32 ? (SourceIsSigned ? H_sitofp_i64_f32 : H_uitofp_i64_f32)
                     : (SourceIsSigned ? H_sitofp_i64_f64 : H_uitofp_i64_f64));
       static constexpr SizeT MaxArgs = 1;
-      auto *Call = InstCall::create(Func, MaxArgs, Dest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(Src0);
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -523,10 +518,9 @@
           isInt32Asserting32Or64(Src0->getType()) ? H_call_ctpop_i32
                                                   : H_call_ctpop_i64);
       static constexpr SizeT MaxArgs = 1;
-      auto *Call = InstCall::create(Func, MaxArgs, Dest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(Src0);
-      Context.insert(Call);
       Instr->setDeleted();
       if (Src0->getType() == IceType_i64) {
         ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64;
@@ -537,11 +531,10 @@
       static constexpr SizeT MaxArgs = 2;
       static constexpr Variable *NoDest = nullptr;
       Operand *TargetHelper = Ctx->getConstantExternSym(H_call_longjmp);
-      auto *Call = InstCall::create(Func, MaxArgs, NoDest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(IntrinsicCall->getArg(0));
       Call->addArg(IntrinsicCall->getArg(1));
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -551,12 +544,11 @@
       static constexpr SizeT MaxArgs = 3;
       static constexpr Variable *NoDest = nullptr;
       Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memcpy);
-      auto *Call = InstCall::create(Func, MaxArgs, NoDest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(IntrinsicCall->getArg(0));
       Call->addArg(IntrinsicCall->getArg(1));
       Call->addArg(IntrinsicCall->getArg(2));
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -564,12 +556,11 @@
       static constexpr SizeT MaxArgs = 3;
       static constexpr Variable *NoDest = nullptr;
       Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memmove);
-      auto *Call = InstCall::create(Func, MaxArgs, NoDest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(IntrinsicCall->getArg(0));
       Call->addArg(IntrinsicCall->getArg(1));
       Call->addArg(IntrinsicCall->getArg(2));
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -579,7 +570,7 @@
       Operand *ValOp = IntrinsicCall->getArg(1);
       assert(ValOp->getType() == IceType_i8);
       Variable *ValExt = Func->makeVariable(stackSlotType());
-      Context.insert(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
+      Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
 
       // Technically, ARM has its own __aeabi_memset, but we can use plain
       // memset too. The value and size argument need to be flipped if we ever
@@ -587,12 +578,11 @@
       static constexpr SizeT MaxArgs = 3;
       static constexpr Variable *NoDest = nullptr;
       Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset);
-      auto *Call = InstCall::create(Func, MaxArgs, NoDest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(IntrinsicCall->getArg(0));
       Call->addArg(ValExt);
       Call->addArg(IntrinsicCall->getArg(2));
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -602,19 +592,17 @@
       }
       static constexpr SizeT MaxArgs = 0;
       Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp);
-      auto *Call = InstCall::create(Func, MaxArgs, Dest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
-      Context.insert(Call);
+      Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
+                               IsTargetHelperCall);
       Instr->setDeleted();
       return;
     }
     case Intrinsics::Setjmp: {
       static constexpr SizeT MaxArgs = 1;
       Operand *TargetHelper = Ctx->getConstantExternSym(H_call_setjmp);
-      auto *Call = InstCall::create(Func, MaxArgs, Dest, TargetHelper,
-                                    NoTailCall, IsTargetHelperCall);
+      auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
+                                            NoTailCall, IsTargetHelperCall);
       Call->addArg(IntrinsicCall->getArg(0));
-      Context.insert(Call);
       Instr->setDeleted();
       return;
     }
@@ -1030,7 +1018,7 @@
           RegARM32::getI64PairSecondGPRNum(RegNum));
     } break;
     }
-    Context.insert(InstAssign::create(Func, Arg, RegisterArg));
+    Context.insert<InstAssign>(Arg, RegisterArg);
   }
 }
 
@@ -1231,7 +1219,7 @@
     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
     _mov(FP, SP);
     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
-    Context.insert(InstFakeUse::create(Func, FP));
+    Context.insert<InstFakeUse>(FP);
   }
 
   // Align the variables area. SpillAreaPaddingBytes is the size of the region
@@ -1360,7 +1348,7 @@
     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
     // use of SP before the assignment of SP=FP keeps previous SP adjustments
     // from being dead-code eliminated.
-    Context.insert(InstFakeUse::create(Func, SP));
+    Context.insert<InstFakeUse>(SP);
     Sandboxer(this).reset_sp(FP);
   } else {
     // add SP, SpillAreaSizeBytes
@@ -1524,7 +1512,7 @@
         .str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
              MovInstr->getPredicate());
     // _str() does not have a Dest, so we add a fake-def(Dest).
-    Target->Context.insert(InstFakeDef::create(Target->Func, Dest));
+    Target->Context.insert<InstFakeDef>(Dest);
     Legalized = true;
   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
     if (Var->isRematerializable()) {
@@ -1899,7 +1887,7 @@
       // value to Dest, as Dest is rematerializable.
       assert(Dest->isRematerializable());
       FixedAllocaSizeBytes += Value;
-      Context.insert(InstFakeDef::create(Func, Dest));
+      Context.insert<InstFakeDef>(Dest);
       return;
     }
 
@@ -1944,7 +1932,7 @@
     Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
     Variable *T = makeReg(IceType_i32);
     _lsls(T, SrcLoReg, ShAmtImm);
-    Context.insert(InstFakeUse::create(Func, T));
+    Context.insert<InstFakeUse>(T);
   } break;
   case IceType_i32: {
     _tst(SrcLoReg, SrcLoReg);
@@ -1955,7 +1943,7 @@
     _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
     // T isn't going to be used, but we need the side-effect of setting flags
     // from this operation.
-    Context.insert(InstFakeUse::create(Func, T));
+    Context.insert<InstFakeUse>(T);
   }
   }
   auto *Label = InstARM32Label::create(Func, this);
@@ -2711,7 +2699,7 @@
   Variable *Dest = Instr->getDest();
 
   if (Dest->isRematerializable()) {
-    Context.insert(InstFakeDef::create(Func, Dest));
+    Context.insert<InstFakeDef>(Dest);
     return;
   }
 
@@ -2731,7 +2719,7 @@
   if (isVectorType(DestTy)) {
     // Add a fake def to keep liveness consistent in the meantime.
     Variable *T = makeReg(DestTy);
-    Context.insert(InstFakeDef::create(Func, T));
+    Context.insert<InstFakeDef>(T);
     _mov(Dest, T);
     UnimplementedError(Func->getContext()->getFlags());
     return;
@@ -3063,7 +3051,7 @@
   Variable *Dest = Inst->getDest();
 
   if (Dest->isRematerializable()) {
-    Context.insert(InstFakeDef::create(Func, Dest));
+    Context.insert<InstFakeDef>(Dest);
     return;
   }
 
@@ -3367,29 +3355,28 @@
   // Copy arguments to be passed in registers to the appropriate registers.
   for (auto &FPArg : FPArgs) {
     Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
-    Context.insert(InstFakeUse::create(Func, Reg));
+    Context.insert<InstFakeUse>(Reg);
   }
   for (auto &GPRArg : GPRArgs) {
     Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
     // Generate a FakeUse of register arguments so that they do not get dead
     // code eliminated as a result of the FakeKill of scratch registers after
     // the call.
-    Context.insert(InstFakeUse::create(Func, Reg));
+    Context.insert<InstFakeUse>(Reg);
   }
 
   InstARM32Call *NewCall =
       Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget);
 
   if (ReturnRegHi)
-    Context.insert(InstFakeDef::create(Func, ReturnRegHi));
+    Context.insert<InstFakeDef>(ReturnRegHi);
 
   // Insert a register-kill pseudo instruction.
-  Context.insert(InstFakeKill::create(Func, NewCall));
+  Context.insert<InstFakeKill>(NewCall);
 
   // Generate a FakeUse to keep the call live if necessary.
   if (Instr->hasSideEffects() && ReturnReg) {
-    Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
-    Context.insert(FakeUse);
+    Context.insert<InstFakeUse>(ReturnReg);
   }
 
   if (Dest != nullptr) {
@@ -3440,7 +3427,7 @@
   case InstCast::Sext: {
     if (isVectorType(Dest->getType())) {
       Variable *T = makeReg(Dest->getType());
-      Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
+      Context.insert<InstFakeDef>(T, legalizeToReg(Src0));
       _mov(Dest, T);
       UnimplementedError(Func->getContext()->getFlags());
     } else if (Dest->getType() == IceType_i64) {
@@ -3488,7 +3475,7 @@
   case InstCast::Zext: {
     if (isVectorType(Dest->getType())) {
       Variable *T = makeReg(Dest->getType());
-      Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
+      Context.insert<InstFakeDef>(T, legalizeToReg(Src0));
       _mov(Dest, T);
       UnimplementedError(Func->getContext()->getFlags());
     } else if (Dest->getType() == IceType_i64) {
@@ -3544,7 +3531,7 @@
   case InstCast::Trunc: {
     if (isVectorType(Dest->getType())) {
       Variable *T = makeReg(Dest->getType());
-      Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
+      Context.insert<InstFakeDef>(T, legalizeToReg(Src0));
       _mov(Dest, T);
       UnimplementedError(Func->getContext()->getFlags());
     } else {
@@ -3567,7 +3554,7 @@
     const bool IsTrunc = CastKind == InstCast::Fptrunc;
     if (isVectorType(Dest->getType())) {
       Variable *T = makeReg(Dest->getType());
-      Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
+      Context.insert<InstFakeDef>(T, legalizeToReg(Src0));
       _mov(Dest, T);
       UnimplementedError(Func->getContext()->getFlags());
       break;
@@ -3584,7 +3571,7 @@
   case InstCast::Fptoui: {
     if (isVectorType(Dest->getType())) {
       Variable *T = makeReg(Dest->getType());
-      Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
+      Context.insert<InstFakeDef>(T, legalizeToReg(Src0));
       _mov(Dest, T);
       UnimplementedError(Func->getContext()->getFlags());
       break;
@@ -3623,7 +3610,7 @@
   case InstCast::Uitofp: {
     if (isVectorType(Dest->getType())) {
       Variable *T = makeReg(Dest->getType());
-      Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
+      Context.insert<InstFakeDef>(T, legalizeToReg(Src0));
       _mov(Dest, T);
       UnimplementedError(Func->getContext()->getFlags());
       break;
@@ -3700,8 +3687,8 @@
       configureBitcastTemporary(T);
       Variable *Src0R = legalizeToReg(Src0);
       _mov(T, Src0R);
-      Context.insert(InstFakeUse::create(Func, T->getHi()));
-      Context.insert(InstFakeUse::create(Func, T->getLo()));
+      Context.insert<InstFakeUse>(T->getHi());
+      Context.insert<InstFakeUse>(T->getLo());
       lowerAssign(InstAssign::create(Func, Dest, T));
       break;
     }
@@ -3729,7 +3716,7 @@
     case IceType_v4i32: {
       // avoid liveness errors
       Variable *T = makeReg(DestType);
-      Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
+      Context.insert<InstFakeDef>(T, legalizeToReg(Src0));
       _mov(Dest, T);
       UnimplementedError(Func->getContext()->getFlags());
       break;
@@ -3744,7 +3731,7 @@
   Variable *Dest = Inst->getDest();
   Type DestType = Dest->getType();
   Variable *T = makeReg(DestType);
-  Context.insert(InstFakeDef::create(Func, T));
+  Context.insert<InstFakeDef>(T);
   _mov(Dest, T);
   UnimplementedError(Func->getContext()->getFlags());
 }
@@ -3826,7 +3813,7 @@
   Variable *Dest = Instr->getDest();
   if (isVectorType(Dest->getType())) {
     Variable *T = makeReg(Dest->getType());
-    Context.insert(InstFakeDef::create(Func, T));
+    Context.insert<InstFakeDef>(T);
     _mov(Dest, T);
     UnimplementedError(Func->getContext()->getFlags());
     return;
@@ -3884,7 +3871,7 @@
       Variable *Src0LoR = SrcsLo.src0R(this);
       Variable *Src0HiR = SrcsHi.src0R(this);
       _orrs(T, Src0LoR, Src0HiR);
-      Context.insert(InstFakeUse::create(Func, T));
+      Context.insert<InstFakeUse>(T);
       return CondWhenTrue(TableIcmp64[Index].C1);
     }
 
@@ -3899,29 +3886,29 @@
       if (TableIcmp64[Index].IsSigned) {
         Variable *T = makeReg(IceType_i32);
         _rsbs(T, Src0RLo, Src1RFLo);
-        Context.insert(InstFakeUse::create(Func, T));
+        Context.insert<InstFakeUse>(T);
 
         T = makeReg(IceType_i32);
         _rscs(T, Src0RHi, Src1RFHi);
         // We need to add a FakeUse here because liveness gets mad at us (Def
         // without Use.) Note that flag-setting instructions are considered to
         // have side effects and, therefore, are not DCE'ed.
-        Context.insert(InstFakeUse::create(Func, T));
+        Context.insert<InstFakeUse>(T);
       } else {
         Variable *T = makeReg(IceType_i32);
         _rsbs(T, Src0RHi, Src1RFHi);
-        Context.insert(InstFakeUse::create(Func, T));
+        Context.insert<InstFakeUse>(T);
 
         T = makeReg(IceType_i32);
         _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
-        Context.insert(InstFakeUse::create(Func, T));
+        Context.insert<InstFakeUse>(T);
       }
     } else {
       if (TableIcmp64[Index].IsSigned) {
         _cmp(Src0RLo, Src1RFLo);
         Variable *T = makeReg(IceType_i32);
         _sbcs(T, Src0RHi, Src1RFHi);
-        Context.insert(InstFakeUse::create(Func, T));
+        Context.insert<InstFakeUse>(T);
       } else {
         _cmp(Src0RHi, Src1RFHi);
         _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
@@ -3980,7 +3967,7 @@
     _sbcs(ScratchReg, Src0RHi, Src1RFHi);
     // ScratchReg isn't going to be used, but we need the side-effect of
     // setting flags from this operation.
-    Context.insert(InstFakeUse::create(Func, ScratchReg));
+    Context.insert<InstFakeUse>(ScratchReg);
   } else {
     _cmp(Src0RHi, Src1RFHi);
     _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
@@ -4020,7 +4007,7 @@
   } else {
     Variable *T = makeReg(IceType_i32);
     _rsbs(T, Src0R, Src1RF);
-    Context.insert(InstFakeUse::create(Func, T));
+    Context.insert<InstFakeUse>(T);
   }
   return CondWhenTrue(getIcmp32Mapping(Condition));
 }
@@ -4049,7 +4036,7 @@
     Operand *ShAmtImm = shAmtImm(ShAmt);
     Variable *T = makeReg(IceType_i32);
     _lsls(T, Srcs.src0R(this), ShAmtImm);
-    Context.insert(InstFakeUse::create(Func, T));
+    Context.insert<InstFakeUse>(T);
     return CondWhenTrue(getIcmp32Mapping(Condition));
   }
 
@@ -4065,7 +4052,7 @@
   } else {
     Variable *T = makeReg(IceType_i32);
     _rsbs(T, ConstR, NonConstF);
-    Context.insert(InstFakeUse::create(Func, T));
+    Context.insert<InstFakeUse>(T);
   }
   return CondWhenTrue(getIcmp32Mapping(Condition));
 }
@@ -4125,7 +4112,7 @@
 
   if (isVectorType(Dest->getType())) {
     Variable *T = makeReg(Dest->getType());
-    Context.insert(InstFakeDef::create(Func, T));
+    Context.insert<InstFakeDef>(T);
     _mov(Dest, T);
     UnimplementedError(Func->getContext()->getFlags());
     return;
@@ -4219,7 +4206,7 @@
   }
 
   if (DestTy == IceType_i64) {
-    Context.insert(InstFakeDef::create(Func, Value));
+    Context.insert<InstFakeDef>(Value);
   }
   lowerAssign(InstAssign::create(Func, Value, Val));
 
@@ -4230,7 +4217,7 @@
   Context.insert(Retry);
   Mem = formMemoryOperand(PtrVar, DestTy);
   if (DestTy == IceType_i64) {
-    Context.insert(InstFakeDef::create(Func, ValueReg, Value));
+    Context.insert<InstFakeDef>(ValueReg, Value);
   }
   lowerAssign(InstAssign::create(Func, ValueReg, Value));
   if (DestTy == IceType_i8 || DestTy == IceType_i16) {
@@ -4239,7 +4226,7 @@
   _ldrex(PtrContentsReg, Mem);
 
   if (DestTy == IceType_i64) {
-    Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg));
+    Context.insert<InstFakeDef>(TmpReg, ValueReg);
   }
   switch (Operation) {
   default:
@@ -4293,12 +4280,12 @@
   // The following fake-uses ensure that Subzero will not clobber them in the
   // load-linked/store-conditional loop above. We might have to spill them, but
   // spilling is preferable over incorrect behavior.
-  Context.insert(InstFakeUse::create(Func, PtrVar));
+  Context.insert<InstFakeUse>(PtrVar);
   if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
-    Context.insert(InstFakeUse::create(Func, Value64->getHi()));
-    Context.insert(InstFakeUse::create(Func, Value64->getLo()));
+    Context.insert<InstFakeUse>(Value64->getHi());
+    Context.insert<InstFakeUse>(Value64->getLo());
   } else {
-    Context.insert(InstFakeUse::create(Func, Value));
+    Context.insert<InstFakeUse>(Value);
   }
   _dmb();
   if (DestTy == IceType_i8 || DestTy == IceType_i16) {
@@ -4306,14 +4293,14 @@
   }
 
   if (DestTy == IceType_i64) {
-    Context.insert(InstFakeUse::create(Func, PtrContentsReg));
+    Context.insert<InstFakeUse>(PtrContentsReg);
   }
   lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg));
   if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
-    Context.insert(InstFakeUse::create(Func, Dest64->getLo()));
-    Context.insert(InstFakeUse::create(Func, Dest64->getHi()));
+    Context.insert<InstFakeUse>(Dest64->getLo());
+    Context.insert<InstFakeUse>(Dest64->getHi());
   } else {
-    Context.insert(InstFakeUse::create(Func, Dest));
+    Context.insert<InstFakeUse>(Dest);
   }
 }
 
@@ -4391,8 +4378,7 @@
     // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
     // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
     // the FakeUse on the last-inserted instruction's dest.
-    Context.insert(
-        InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
+    Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
     return;
   }
   case Intrinsics::AtomicStore: {
@@ -4438,21 +4424,20 @@
       lowerAssign(InstAssign::create(Func, AddrVar, Addr));
 
       Context.insert(Retry);
-      Context.insert(InstFakeDef::create(Func, NewReg));
+      Context.insert<InstFakeDef>(NewReg);
       lowerAssign(InstAssign::create(Func, NewReg, ValueVar));
       Mem = formMemoryOperand(AddrVar, IceType_i64);
       _ldrex(Tmp, Mem);
       // This fake-use both prevents the ldrex from being dead-code eliminated,
       // while also keeping liveness happy about all defs being used.
-      Context.insert(
-          InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
+      Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
       _strex(Success, NewReg, Mem);
       _cmp(Success, _0);
       _br(Retry, CondARM32::NE);
 
-      Context.insert(InstFakeUse::create(Func, ValueVar->getLo()));
-      Context.insert(InstFakeUse::create(Func, ValueVar->getHi()));
-      Context.insert(InstFakeUse::create(Func, AddrVar));
+      Context.insert<InstFakeUse>(ValueVar->getLo());
+      Context.insert<InstFakeUse>(ValueVar->getHi());
+      Context.insert<InstFakeUse>(AddrVar);
       _dmb();
       return;
     }
@@ -4550,35 +4535,34 @@
 
     Mem = formMemoryOperand(Instr->getArg(0), DestTy);
     if (DestTy == IceType_i64) {
-      Context.insert(InstFakeDef::create(Func, Expected));
+      Context.insert<InstFakeDef>(Expected);
     }
     lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
     if (DestTy == IceType_i64) {
-      Context.insert(InstFakeDef::create(Func, New));
+      Context.insert<InstFakeDef>(New);
     }
     lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
     _dmb();
 
     Context.insert(Retry);
     if (DestTy == IceType_i64) {
-      Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected));
+      Context.insert<InstFakeDef>(ExpectedReg, Expected);
     }
     lowerAssign(InstAssign::create(Func, ExpectedReg, Expected));
     if (DestTy == IceType_i64) {
-      Context.insert(InstFakeDef::create(Func, NewReg, New));
+      Context.insert<InstFakeDef>(NewReg, New);
     }
     lowerAssign(InstAssign::create(Func, NewReg, New));
 
     _ldrex(TmpReg, Mem);
-    Context.insert(
-        InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
+    Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
     if (DestTy == IceType_i64) {
       auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
       auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg);
       // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's
       // keep liveness happy, shall we?
-      Context.insert(InstFakeUse::create(Func, TmpReg));
-      Context.insert(InstFakeUse::create(Func, ExpectedReg));
+      Context.insert<InstFakeUse>(TmpReg);
+      Context.insert<InstFakeUse>(ExpectedReg);
       _cmp(TmpReg64->getHi(), ExpectedReg64->getHi());
       _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ);
     } else {
@@ -4590,9 +4574,8 @@
       auto *Expected64 = llvm::cast<Variable64On32>(Expected);
       _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE);
       _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE);
-      auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg);
-      Context.insert(FakeDef);
-      FakeDef->setDestRedefined();
+      Context.insert<InstFakeDef>(Expected, TmpReg);
+      _set_dest_redefined();
     } else {
       _mov_redefined(Expected, TmpReg, CondARM32::NE);
     }
@@ -4600,12 +4583,12 @@
     _br(Retry, CondARM32::NE);
     _dmb();
     lowerAssign(InstAssign::create(Func, Dest, Expected));
-    Context.insert(InstFakeUse::create(Func, Expected));
+    Context.insert<InstFakeUse>(Expected);
     if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) {
-      Context.insert(InstFakeUse::create(Func, New64->getLo()));
-      Context.insert(InstFakeUse::create(Func, New64->getHi()));
+      Context.insert<InstFakeUse>(New64->getLo());
+      Context.insert<InstFakeUse>(New64->getHi());
     } else {
-      Context.insert(InstFakeUse::create(Func, New));
+      Context.insert<InstFakeUse>(New);
     }
     return;
   }
@@ -4697,7 +4680,7 @@
     Variable *T = makeReg(DestTy);
     if (isVectorType(DestTy)) {
       // Add a fake def to keep liveness consistent in the meantime.
-      Context.insert(InstFakeDef::create(Func, T));
+      Context.insert<InstFakeDef>(T);
       _mov(Dest, T);
       UnimplementedError(Func->getContext()->getFlags());
       return;
@@ -5162,8 +5145,7 @@
     //
     const Type PointerType = getPointerType();
     BaseVar = makeReg(PointerType);
-    Context.insert(
-        InstAssign::create(Func, BaseVar, Ctx->getConstantInt32(OffsetImm)));
+    Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
     OffsetImm = 0;
   } else if (OffsetImm != 0) {
     // ARM Ldr/Str instructions have limited range immediates. The formation
@@ -5194,8 +5176,8 @@
       //      use of [T, Offset {, LSL amount}]
       const Type PointerType = getPointerType();
       Variable *T = makeReg(PointerType);
-      Context.insert(InstArithmetic::create(
-          Func, Op, T, BaseVar, Ctx->getConstantInt32(PositiveOffset)));
+      Context.insert<InstArithmetic>(Op, T, BaseVar,
+                                     Ctx->getConstantInt32(PositiveOffset));
       BaseVar = T;
       OffsetImm = 0;
     }
@@ -5209,7 +5191,7 @@
 
   if (OffsetReg != nullptr) {
     Variable *OffsetR = makeReg(getPointerType());
-    Context.insert(InstAssign::create(Func, OffsetR, OffsetReg));
+    Context.insert<InstAssign>(OffsetR, OffsetReg);
     return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
                                    OffsetRegShamt);
   }
@@ -5227,7 +5209,7 @@
   if (OperandARM32Mem *Mem =
           formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
     Instr->setDeleted();
-    Context.insert(InstLoad::create(Func, Dest, Mem));
+    Context.insert<InstLoad>(Dest, Mem);
   }
 }
 
@@ -5253,7 +5235,7 @@
       Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
       Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
       Reg = R0;
-      Context.insert(InstFakeUse::create(Func, R1));
+      Context.insert<InstFakeUse>(R1);
     } else if (Ty == IceType_f32) {
       Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
       Reg = S0;
@@ -5280,7 +5262,7 @@
   // TODO: Are there more places where the fake use should be inserted? E.g.
   // "void f(int n){while(1) g(n);}" may not have a ret instruction.
   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
-  Context.insert(InstFakeUse::create(Func, SP));
+  Context.insert<InstFakeUse>(SP);
 }
 
 void TargetARM32::lowerSelect(const InstSelect *Inst) {
@@ -5292,7 +5274,7 @@
 
   if (isVectorType(DestTy)) {
     Variable *T = makeReg(DestTy);
-    Context.insert(InstFakeDef::create(Func, T));
+    Context.insert<InstFakeDef>(T);
     _mov(Dest, T);
     UnimplementedError(Func->getContext()->getFlags());
     return;
@@ -5327,7 +5309,7 @@
   if (OperandARM32Mem *Mem =
           formAddressingMode(Src->getType(), Func, Instr, Addr)) {
     Instr->setDeleted();
-    Context.insert(InstStore::create(Func, Src, Mem));
+    Context.insert<InstStore>(Src, Mem);
   }
 }
 
@@ -5385,7 +5367,7 @@
 
 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
   Variable *Reg = makeReg(Ty, RegNum);
-  Context.insert(InstFakeDef::create(Func, Reg));
+  Context.insert<InstFakeDef>(Reg);
   UnimplementedError(Func->getContext()->getFlags());
   return Reg;
 }
@@ -5559,7 +5541,7 @@
         // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
         // because ARM does not have a veor instruction with S registers.
         Variable *T = makeReg(IceType_f64, RegNum);
-        Context.insert(InstFakeDef::create(Func, T));
+        Context.insert<InstFakeDef>(T);
         _veor(T, T, T);
         return T;
       }
@@ -6187,9 +6169,7 @@
                    indirectBranchBicMask(Target->Func));
     }
   }
-  auto *Call = InstARM32Call::create(Target->Func, ReturnReg, CallTarget);
-  Target->Context.insert(Call);
-  return Call;
+  return Target->Context.insert<InstARM32Call>(ReturnReg, CallTarget);
 }
 
 void TargetARM32::Sandboxer::ldr(Variable *Dest, OperandARM32Mem *Mem,
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 469c7cf..306aaa9 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -277,60 +277,56 @@
   // assembly as practical.
   void _add(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Add::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
   }
   void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
     constexpr bool SetFlags = true;
-    Context.insert(
-        InstARM32Add::create(Func, Dest, Src0, Src1, Pred, SetFlags));
+    Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
   }
   void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Adc::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
   }
   void _and(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32And::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
   }
   void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Asr::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
   }
   void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Bic::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
   }
   void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
            CondARM32::Cond Condition) {
-    Context.insert(
-        InstARM32Br::create(Func, TargetTrue, TargetFalse, Condition));
+    Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
   }
-  void _br(CfgNode *Target) {
-    Context.insert(InstARM32Br::create(Func, Target));
-  }
+  void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
   void _br(CfgNode *Target, CondARM32::Cond Condition) {
-    Context.insert(InstARM32Br::create(Func, Target, Condition));
+    Context.insert<InstARM32Br>(Target, Condition);
   }
   void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
-    Context.insert(InstARM32Br::create(Func, Label, Condition));
+    Context.insert<InstARM32Br>(Label, Condition);
   }
   void _cmn(Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Cmn::create(Func, Src0, Src1, Pred));
+    Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
   }
   void _cmp(Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
+    Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
   }
   void _clz(Variable *Dest, Variable *Src0,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Clz::create(Func, Dest, Src0, Pred));
+    Context.insert<InstARM32Clz>(Dest, Src0, Pred);
   }
-  void _dmb() { Context.insert(InstARM32Dmb::create(Func)); }
+  void _dmb() { Context.insert<InstARM32Dmb>(); }
   void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Eor::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
   }
   /// _ldr, for all your memory to Variable data moves. It handles all types
   /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
@@ -338,37 +334,36 @@
   /// loads.)
   void _ldr(Variable *Dest, OperandARM32Mem *Addr,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Ldr::create(Func, Dest, Addr, Pred));
+    Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
   }
   void _ldrex(Variable *Dest, OperandARM32Mem *Addr,
               CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Ldrex::create(Func, Dest, Addr, Pred));
+    Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
     if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
-      Context.insert(InstFakeDef::create(Func, Dest64->getLo(), Dest));
-      Context.insert(InstFakeDef::create(Func, Dest64->getHi(), Dest));
+      Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
+      Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
     }
   }
   void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
   }
   void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
     constexpr bool SetFlags = true;
-    Context.insert(
-        InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred, SetFlags));
+    Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
   }
   void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Lsr::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
   }
   void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Mla::create(Func, Dest, Src0, Src1, Acc, Pred));
+    Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
   }
   void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
+    Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
   }
   /// _mov, for all your Variable to Variable data movement needs. It handles
   /// all types (integer, floating point, and vectors), as well as moves between
@@ -382,27 +377,25 @@
     // is nullptr.
     assert(Dest != nullptr);
     assert(!llvm::isa<OperandARM32Mem>(Src0));
-    auto *Instr = InstARM32Mov::create(Func, Dest, Src0, Pred);
+    auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
 
-    Context.insert(Instr);
     if (Instr->isMultiDest()) {
       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
       // fake-def for Instr.DestHi here.
       assert(llvm::isa<Variable64On32>(Dest));
-      Context.insert(InstFakeDef::create(Func, Instr->getDestHi()));
+      Context.insert<InstFakeDef>(Instr->getDestHi());
     }
   }
 
   void _mov_redefined(Variable *Dest, Operand *Src0,
                       CondARM32::Cond Pred = CondARM32::AL) {
-    auto *Instr = InstARM32Mov::create(Func, Dest, Src0, Pred);
+    auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
     Instr->setDestRedefined();
-    Context.insert(Instr);
     if (Instr->isMultiDest()) {
       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
       // fake-def for Instr.DestHi here.
       assert(llvm::isa<Variable64On32>(Dest));
-      Context.insert(InstFakeDef::create(Func, Instr->getDestHi()));
+      Context.insert<InstFakeDef>(Instr->getDestHi());
     }
   }
 
@@ -637,182 +630,173 @@
   /// an upper16 relocation).
   void _movt(Variable *Dest, Operand *Src0,
              CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Movt::create(Func, Dest, Src0, Pred));
+    Context.insert<InstARM32Movt>(Dest, Src0, Pred);
   }
   void _movw(Variable *Dest, Operand *Src0,
              CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Movw::create(Func, Dest, Src0, Pred));
+    Context.insert<InstARM32Movw>(Dest, Src0, Pred);
   }
   void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Mul::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
   }
   void _mvn(Variable *Dest, Operand *Src0,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Mvn::create(Func, Dest, Src0, Pred));
+    Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
   }
   void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Orr::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
   }
   void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
     constexpr bool SetFlags = true;
-    Context.insert(
-        InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags));
+    Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
   }
-  void _push(const VarList &Sources) {
-    Context.insert(InstARM32Push::create(Func, Sources));
-  }
+  void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
   void _pop(const VarList &Dests) {
-    Context.insert(InstARM32Pop::create(Func, Dests));
+    Context.insert<InstARM32Pop>(Dests);
     // Mark dests as modified.
     for (Variable *Dest : Dests)
-      Context.insert(InstFakeDef::create(Func, Dest));
+      Context.insert<InstFakeDef>(Dest);
   }
   void _rbit(Variable *Dest, Variable *Src0,
              CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Rbit::create(Func, Dest, Src0, Pred));
+    Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
   }
   void _rev(Variable *Dest, Variable *Src0,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Rev::create(Func, Dest, Src0, Pred));
+    Context.insert<InstARM32Rev>(Dest, Src0, Pred);
   }
   void _ret(Variable *LR, Variable *Src0 = nullptr) {
-    Context.insert(InstARM32Ret::create(Func, LR, Src0));
+    Context.insert<InstARM32Ret>(LR, Src0);
   }
   void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
     constexpr bool SetFlags = true;
-    Context.insert(
-        InstARM32Rsc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
+    Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
   }
   void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Rsc::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
   }
   void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
     constexpr bool SetFlags = true;
-    Context.insert(
-        InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred, SetFlags));
+    Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
   }
   void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
   }
   void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
   }
   void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
     constexpr bool SetFlags = true;
-    Context.insert(
-        InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
+    Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
   }
   void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Sdiv::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
   }
   /// _str, for all your Variable to memory transfers. Addr has the same
   /// restrictions that it does in _ldr.
   void _str(Variable *Value, OperandARM32Mem *Addr,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Str::create(Func, Value, Addr, Pred));
+    Context.insert<InstARM32Str>(Value, Addr, Pred);
   }
   void _strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
               CondARM32::Cond Pred = CondARM32::AL) {
     // strex requires Dest to be a register other than Value or Addr. This
     // restriction is cleanly represented by adding an "early" definition of
     // Dest (or a latter use of all the sources.)
-    Context.insert(InstFakeDef::create(Func, Dest));
+    Context.insert<InstFakeDef>(Dest);
     if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
-      Context.insert(InstFakeUse::create(Func, Value64->getLo()));
-      Context.insert(InstFakeUse::create(Func, Value64->getHi()));
+      Context.insert<InstFakeUse>(Value64->getLo());
+      Context.insert<InstFakeUse>(Value64->getHi());
     }
-    auto *Instr = InstARM32Strex::create(Func, Dest, Value, Addr, Pred);
-    Context.insert(Instr);
+    auto *Instr = Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
     Instr->setDestRedefined();
   }
   void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Sub::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
   }
   void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
     constexpr bool SetFlags = true;
-    Context.insert(
-        InstARM32Sub::create(Func, Dest, Src0, Src1, Pred, SetFlags));
+    Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
   }
   void _sxt(Variable *Dest, Variable *Src0,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Sxt::create(Func, Dest, Src0, Pred));
+    Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
   }
   void _tst(Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Tst::create(Func, Src0, Src1, Pred));
+    Context.insert<InstARM32Tst>(Src0, Src1, Pred);
   }
-  void _trap() { Context.insert(InstARM32Trap::create(Func)); }
+  void _trap() { Context.insert<InstARM32Trap>(); }
   void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Udiv::create(Func, Dest, Src0, Src1, Pred));
+    Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
   }
   void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
               Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(
-        InstARM32Umull::create(Func, DestLo, DestHi, Src0, Src1, Pred));
+    Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
     // Model the modification to the second dest as a fake def. Note that the
     // def is not predicated.
-    Context.insert(InstFakeDef::create(Func, DestHi, DestLo));
+    Context.insert<InstFakeDef>(DestHi, DestLo);
   }
   void _uxt(Variable *Dest, Variable *Src0,
             CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
+    Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
   }
   void _vabs(Variable *Dest, Variable *Src,
              CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Vabs::create(Func, Dest, Src, Pred));
+    Context.insert<InstARM32Vabs>(Dest, Src, Pred);
   }
   void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstARM32Vadd::create(Func, Dest, Src0, Src1));
+    Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
   }
   void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
              CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Vcvt::create(Func, Dest, Src, Variant, Pred));
+    Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
   }
   void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstARM32Vdiv::create(Func, Dest, Src0, Src1));
+    Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
   }
   void _vcmp(Variable *Src0, Variable *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Vcmp::create(Func, Src0, Src1, Pred));
+    Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
   }
   void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
              CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Vcmp::create(Func, Src0, FpZero, Pred));
+    Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
   }
   void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1));
+    Context.insert<InstARM32Veor>(Dest, Src0, Src1);
   }
   void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Vmrs::create(Func, Pred));
+    Context.insert<InstARM32Vmrs>(Pred);
   }
   void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstARM32Vmla::create(Func, Dest, Src0, Src1));
+    Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
   }
   void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstARM32Vmls::create(Func, Dest, Src0, Src1));
+    Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
   }
   void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
+    Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
   }
   void _vsqrt(Variable *Dest, Variable *Src,
               CondARM32::Cond Pred = CondARM32::AL) {
-    Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred));
+    Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
   }
   void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1));
+    Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
   }
 
   // Iterates over the CFG and determines the maximum outgoing stack arguments
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 2295dfb..391a4e9 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -388,7 +388,7 @@
       RegisterArg64On32->getHi()->setRegNum(RegHi);
       Arg->setIsArg(false);
       Args[I] = RegisterArg64On32;
-      Context.insert(InstAssign::create(Func, Arg, RegisterArg));
+      Context.insert<InstAssign>(Arg, RegisterArg);
       continue;
     } else {
       assert(Ty == IceType_i32);
@@ -404,7 +404,7 @@
       RegisterArg->setIsArg();
       Arg->setIsArg(false);
       Args[I] = RegisterArg;
-      Context.insert(InstAssign::create(Func, Arg, RegisterArg));
+      Context.insert<InstAssign>(Arg, RegisterArg);
     }
   }
 }
@@ -533,13 +533,13 @@
     // TODO(reed kotler): fakedef needed for now until all cases are implemented
     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
-    Context.insert(InstFakeDef::create(Func, DestLo));
-    Context.insert(InstFakeDef::create(Func, DestHi));
+    Context.insert<InstFakeDef>(DestLo);
+    Context.insert<InstFakeDef>(DestHi);
     UnimplementedError(Func->getContext()->getFlags());
     return;
   }
   if (isVectorType(Dest->getType())) {
-    Context.insert(InstFakeDef::create(Func, Dest));
+    Context.insert<InstFakeDef>(Dest);
     UnimplementedError(Func->getContext()->getFlags());
     return;
   }
@@ -602,9 +602,9 @@
   }
   // TODO(reed kotler):
   // fakedef and fakeuse needed for now until all cases are implemented
-  Context.insert(InstFakeUse::create(Func, Src0R));
-  Context.insert(InstFakeUse::create(Func, Src1R));
-  Context.insert(InstFakeDef::create(Func, Dest));
+  Context.insert<InstFakeUse>(Src0R);
+  Context.insert<InstFakeUse>(Src1R);
+  Context.insert<InstFakeDef>(Dest);
   UnimplementedError(Func->getContext()->getFlags());
 }
 
@@ -888,7 +888,7 @@
       Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
       Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
       Reg = R0;
-      Context.insert(InstFakeUse::create(Func, R1));
+      Context.insert<InstFakeUse>(R1);
       break;
     }
 
@@ -1022,7 +1022,7 @@
     (void)C;
     // TODO(reed kotler): complete this case for proper implementation
     Variable *Reg = makeReg(Ty, RegNum);
-    Context.insert(InstFakeDef::create(Func, Reg));
+    Context.insert<InstFakeDef>(Reg);
     return Reg;
   } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
     uint32_t Value = static_cast<uint32_t>(C32->getValue());
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index bf96661..60c62dd 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -116,60 +116,59 @@
   // minimal syntactic overhead, so that the lowering code can look as close to
   // assembly as practical.
   void _add(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstMIPS32Add::create(Func, Dest, Src0, Src1));
+    Context.insert<InstMIPS32Add>(Dest, Src0, Src1);
   }
 
   void _and(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstMIPS32And::create(Func, Dest, Src0, Src1));
+    Context.insert<InstMIPS32And>(Dest, Src0, Src1);
   }
 
   void _ret(Variable *RA, Variable *Src0 = nullptr) {
-    Context.insert(InstMIPS32Ret::create(Func, RA, Src0));
+    Context.insert<InstMIPS32Ret>(RA, Src0);
   }
 
   void _addiu(Variable *Dest, Variable *Src, uint32_t Imm) {
-    Context.insert(InstMIPS32Addiu::create(Func, Dest, Src, Imm));
+    Context.insert<InstMIPS32Addiu>(Dest, Src, Imm);
   }
 
   void _lui(Variable *Dest, uint32_t Imm) {
-    Context.insert(InstMIPS32Lui::create(Func, Dest, Imm));
+    Context.insert<InstMIPS32Lui>(Dest, Imm);
   }
 
   void _mov(Variable *Dest, Operand *Src0) {
     assert(Dest != nullptr);
     // Variable* Src0_ = llvm::dyn_cast<Variable>(Src0);
     if (llvm::isa<ConstantRelocatable>(Src0)) {
-      Context.insert(InstMIPS32La::create(Func, Dest, Src0));
+      Context.insert<InstMIPS32La>(Dest, Src0);
     } else {
-      auto *Instr = InstMIPS32Mov::create(Func, Dest, Src0);
-      Context.insert(Instr);
+      auto *Instr = Context.insert<InstMIPS32Mov>(Dest, Src0);
       if (Instr->isMultiDest()) {
         // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
         // fake-def for Instr.DestHi here.
         assert(llvm::isa<Variable64On32>(Dest));
-        Context.insert(InstFakeDef::create(Func, Instr->getDestHi()));
+        Context.insert<InstFakeDef>(Instr->getDestHi());
       }
     }
   }
 
   void _mul(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstMIPS32Mul::create(Func, Dest, Src0, Src1));
+    Context.insert<InstMIPS32Mul>(Dest, Src0, Src1);
   }
 
   void _or(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstMIPS32Or::create(Func, Dest, Src0, Src1));
+    Context.insert<InstMIPS32Or>(Dest, Src0, Src1);
   }
 
   void _ori(Variable *Dest, Variable *Src, uint32_t Imm) {
-    Context.insert(InstMIPS32Ori::create(Func, Dest, Src, Imm));
+    Context.insert<InstMIPS32Ori>(Dest, Src, Imm);
   }
 
   void _sub(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstMIPS32Sub::create(Func, Dest, Src0, Src1));
+    Context.insert<InstMIPS32Sub>(Dest, Src0, Src1);
   }
 
   void _xor(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstMIPS32Xor::create(Func, Dest, Src0, Src1));
+    Context.insert<InstMIPS32Xor>(Dest, Src0, Src1);
   }
 
   void lowerArguments() override;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 87c094b..a09629c 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -190,7 +190,7 @@
     // Generate a FakeUse of register arguments so that they do not get dead
     // code eliminated as a result of the FakeKill of scratch registers after
     // the call.
-    Context.insert(InstFakeUse::create(Func, Reg));
+    Context.insert<InstFakeUse>(Reg);
   }
   // Generate the call instruction. Assign its result to a temporary with high
   // register allocation weight.
@@ -244,15 +244,14 @@
       CallTarget = CallTargetVar;
     }
   }
-  Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
-  Context.insert(NewCall);
+  auto *NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
   if (NeedSandboxing)
     _bundle_unlock();
   if (ReturnRegHi)
-    Context.insert(InstFakeDef::create(Func, ReturnRegHi));
+    Context.insert<InstFakeDef>(ReturnRegHi);
 
   // Insert a register-kill pseudo instruction.
-  Context.insert(InstFakeKill::create(Func, NewCall));
+  Context.insert<InstFakeKill>(NewCall);
 
   if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
     // Special treatment for an FP function which returns its result in st(0).
@@ -262,13 +261,12 @@
     _fstp(Dest);
     // Create a fake use of Dest in case it actually isn't used, because st(0)
     // still needs to be popped.
-    Context.insert(InstFakeUse::create(Func, Dest));
+    Context.insert<InstFakeUse>(Dest);
   }
 
   // Generate a FakeUse to keep the call live if necessary.
   if (Instr->hasSideEffects() && ReturnReg) {
-    Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
-    Context.insert(FakeUse);
+    Context.insert<InstFakeUse>(ReturnReg);
   }
 
   if (!Dest)
@@ -324,7 +322,7 @@
     Arg->setIsArg(false);
 
     Args[I] = RegisterArg;
-    Context.insert(InstAssign::create(Func, Arg, RegisterArg));
+    Context.insert<InstAssign>(Arg, RegisterArg);
   }
 }
 
@@ -339,7 +337,7 @@
       Variable *edx =
           legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
       Reg = eax;
-      Context.insert(InstFakeUse::create(Func, edx));
+      Context.insert<InstFakeUse>(edx);
     } else if (isScalarFloatingType(Src0->getType())) {
       _fld(Src0);
     } else if (isVectorType(Src0->getType())) {
@@ -469,7 +467,7 @@
     _push(ebp);
     _mov(ebp, esp);
     // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
-    Context.insert(InstFakeUse::create(Func, ebp));
+    Context.insert<InstFakeUse>(ebp);
   }
 
   // Align the variables area. SpillAreaPaddingBytes is the size of the region
@@ -633,7 +631,7 @@
     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
     // use of esp before the assignment of esp=ebp keeps previous esp
     // adjustments from being dead-code eliminated.
-    Context.insert(InstFakeUse::create(Func, esp));
+    Context.insert<InstFakeUse>(esp);
     _mov(esp, ebp);
     _pop(ebp);
   } else {
@@ -676,7 +674,7 @@
   lowerIndirectJump(T_ecx);
   if (RI->getSrcSize()) {
     auto *RetValue = llvm::cast<Variable>(RI->getSrc(0));
-    Context.insert(InstFakeUse::create(Func, RetValue));
+    Context.insert<InstFakeUse>(RetValue);
   }
   RI->setDeleted();
 }
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 087c36e..6844d4a 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -226,12 +226,12 @@
     // Generate a FakeUse of register arguments so that they do not get dead
     // code eliminated as a result of the FakeKill of scratch registers after
     // the call.
-    Context.insert(InstFakeUse::create(Func, Reg));
+    Context.insert<InstFakeUse>(Reg);
   }
 
   for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
     Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));
-    Context.insert(InstFakeUse::create(Func, Reg));
+    Context.insert<InstFakeUse>(Reg);
   }
 
   // Generate the call instruction. Assign its result to a temporary with high
@@ -271,8 +271,7 @@
   if (NeedSandboxing) {
     llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
   }
-  Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
-  Context.insert(NewCall);
+  auto *NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
   if (NeedSandboxing) {
     llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
   }
@@ -286,12 +285,11 @@
   }
 
   // Insert a register-kill pseudo instruction.
-  Context.insert(InstFakeKill::create(Func, NewCall));
+  Context.insert<InstFakeKill>(NewCall);
 
   // Generate a FakeUse to keep the call live if necessary.
   if (Instr->hasSideEffects() && ReturnReg) {
-    Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
-    Context.insert(FakeUse);
+    Context.insert<InstFakeUse>(ReturnReg);
   }
 
   if (!Dest)
@@ -356,7 +354,7 @@
     Arg->setIsArg(false);
 
     Args[i] = RegisterArg;
-    Context.insert(InstAssign::create(Func, Arg, RegisterArg));
+    Context.insert<InstAssign>(Arg, RegisterArg);
   }
 }
 
@@ -486,7 +484,7 @@
     _push(ebp);
     _mov(ebp, esp);
     // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
-    Context.insert(InstFakeUse::create(Func, ebp));
+    Context.insert<InstFakeUse>(ebp);
   }
 
   // Align the variables area. SpillAreaPaddingBytes is the size of the region
@@ -645,7 +643,7 @@
     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
     // use of esp before the assignment of esp=ebp keeps previous esp
     // adjustments from being dead-code eliminated.
-    Context.insert(InstFakeUse::create(Func, esp));
+    Context.insert<InstFakeUse>(esp);
     _mov(esp, ebp);
     _pop(ebp);
   } else {
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 488d68a..bf43072 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -250,7 +250,7 @@
   /// function. Otherwise some esp adjustments get dead-code eliminated.
   void keepEspLiveAtExit() {
     Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());
-    Context.insert(InstFakeUse::create(Func, esp));
+    Context.insert<InstFakeUse>(esp);
   }
 
   /// Operand legalization helpers. To deal with address mode constraints, the
@@ -327,117 +327,115 @@
   /// minimal syntactic overhead, so that the lowering code can look as close to
   /// assembly as practical.
   void _adc(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Adc::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Adc>(Dest, Src0);
   }
   void _adc_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
-    Context.insert(Traits::Insts::AdcRMW::create(Func, DestSrc0, Src1));
+    Context.insert<typename Traits::Insts::AdcRMW>(DestSrc0, Src1);
   }
   void _add(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Add::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Add>(Dest, Src0);
   }
   void _add_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
-    Context.insert(Traits::Insts::AddRMW::create(Func, DestSrc0, Src1));
+    Context.insert<typename Traits::Insts::AddRMW>(DestSrc0, Src1);
   }
   void _addps(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Addps::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Addps>(Dest, Src0);
   }
   void _addss(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Addss::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Addss>(Dest, Src0);
   }
   void _and(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::And::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::And>(Dest, Src0);
   }
   void _andnps(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Andnps::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Andnps>(Dest, Src0);
   }
   void _andps(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Andps::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Andps>(Dest, Src0);
   }
   void _and_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
-    Context.insert(Traits::Insts::AndRMW::create(Func, DestSrc0, Src1));
+    Context.insert<typename Traits::Insts::AndRMW>(DestSrc0, Src1);
   }
   void _blendvps(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Blendvps::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Blendvps>(Dest, Src0, Src1);
   }
   void _br(typename Traits::Cond::BrCond Condition, CfgNode *TargetTrue,
            CfgNode *TargetFalse) {
-    Context.insert(Traits::Insts::Br::create(
-        Func, TargetTrue, TargetFalse, Condition, Traits::Insts::Br::Far));
+    Context.insert<typename Traits::Insts::Br>(
+        TargetTrue, TargetFalse, Condition, Traits::Insts::Br::Far);
   }
   void _br(CfgNode *Target) {
-    Context.insert(
-        Traits::Insts::Br::create(Func, Target, Traits::Insts::Br::Far));
+    Context.insert<typename Traits::Insts::Br>(Target, Traits::Insts::Br::Far);
   }
   void _br(typename Traits::Cond::BrCond Condition, CfgNode *Target) {
-    Context.insert(Traits::Insts::Br::create(Func, Target, Condition,
-                                             Traits::Insts::Br::Far));
+    Context.insert<typename Traits::Insts::Br>(Target, Condition,
+                                               Traits::Insts::Br::Far);
   }
   void _br(typename Traits::Cond::BrCond Condition,
            typename Traits::Insts::Label *Label,
            typename Traits::Insts::Br::Mode Kind = Traits::Insts::Br::Near) {
-    Context.insert(Traits::Insts::Br::create(Func, Label, Condition, Kind));
+    Context.insert<typename Traits::Insts::Br>(Label, Condition, Kind);
   }
   void _bsf(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Bsf::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Bsf>(Dest, Src0);
   }
   void _bsr(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Bsr::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Bsr>(Dest, Src0);
   }
   void _bswap(Variable *SrcDest) {
-    Context.insert(Traits::Insts::Bswap::create(Func, SrcDest));
+    Context.insert<typename Traits::Insts::Bswap>(SrcDest);
   }
   void _cbwdq(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Cbwdq::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Cbwdq>(Dest, Src0);
   }
   void _cmov(Variable *Dest, Operand *Src0,
              typename Traits::Cond::BrCond Condition) {
-    Context.insert(Traits::Insts::Cmov::create(Func, Dest, Src0, Condition));
+    Context.insert<typename Traits::Insts::Cmov>(Dest, Src0, Condition);
   }
   void _cmp(Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Icmp::create(Func, Src0, Src1));
+    Context.insert<typename Traits::Insts::Icmp>(Src0, Src1);
   }
   void _cmpps(Variable *Dest, Operand *Src0,
               typename Traits::Cond::CmppsCond Condition) {
-    Context.insert(Traits::Insts::Cmpps::create(Func, Dest, Src0, Condition));
+    Context.insert<typename Traits::Insts::Cmpps>(Dest, Src0, Condition);
   }
   void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired,
                 bool Locked) {
-    Context.insert(
-        Traits::Insts::Cmpxchg::create(Func, DestOrAddr, Eax, Desired, Locked));
+    Context.insert<typename Traits::Insts::Cmpxchg>(DestOrAddr, Eax, Desired,
+                                                    Locked);
     // Mark eax as possibly modified by cmpxchg.
-    Context.insert(
-        InstFakeDef::create(Func, Eax, llvm::dyn_cast<Variable>(DestOrAddr)));
+    Context.insert<InstFakeDef>(Eax, llvm::dyn_cast<Variable>(DestOrAddr));
     _set_dest_redefined();
-    Context.insert(InstFakeUse::create(Func, Eax));
+    Context.insert<InstFakeUse>(Eax);
   }
   void _cmpxchg8b(typename Traits::X86OperandMem *Addr, Variable *Edx,
                   Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked) {
-    Context.insert(Traits::Insts::Cmpxchg8b::create(Func, Addr, Edx, Eax, Ecx,
-                                                    Ebx, Locked));
+    Context.insert<typename Traits::Insts::Cmpxchg8b>(Addr, Edx, Eax, Ecx, Ebx,
+                                                      Locked);
     // Mark edx, and eax as possibly modified by cmpxchg8b.
-    Context.insert(InstFakeDef::create(Func, Edx));
+    Context.insert<InstFakeDef>(Edx);
     _set_dest_redefined();
-    Context.insert(InstFakeUse::create(Func, Edx));
-    Context.insert(InstFakeDef::create(Func, Eax));
+    Context.insert<InstFakeUse>(Edx);
+    Context.insert<InstFakeDef>(Eax);
     _set_dest_redefined();
-    Context.insert(InstFakeUse::create(Func, Eax));
+    Context.insert<InstFakeUse>(Eax);
   }
   void _cvt(Variable *Dest, Operand *Src0,
             typename Traits::Insts::Cvt::CvtVariant Variant) {
-    Context.insert(Traits::Insts::Cvt::create(Func, Dest, Src0, Variant));
+    Context.insert<typename Traits::Insts::Cvt>(Dest, Src0, Variant);
   }
   void _div(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Div::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Div>(Dest, Src0, Src1);
   }
   void _divps(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Divps::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Divps>(Dest, Src0);
   }
   void _divss(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Divss::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Divss>(Dest, Src0);
   }
   template <typename T = Traits>
   typename std::enable_if<T::UsesX87, void>::type _fld(Operand *Src0) {
-    Context.insert(Traits::Insts::template Fld<>::create(Func, Src0));
+    Context.insert<typename Traits::Insts::template Fld<>>(Src0);
   }
   // TODO(jpp): when implementing the X8664 calling convention, make sure x8664
   // does not invoke this method, and remove it.
@@ -447,7 +445,7 @@
   }
   template <typename T = Traits>
   typename std::enable_if<T::UsesX87, void>::type _fstp(Variable *Dest) {
-    Context.insert(Traits::Insts::template Fstp<>::create(Func, Dest));
+    Context.insert<typename Traits::Insts::template Fstp<>>(Dest);
   }
   // TODO(jpp): when implementing the X8664 calling convention, make sure x8664
   // does not invoke this method, and remove it.
@@ -456,24 +454,24 @@
     llvm::report_fatal_error("fstp is not available in x86-64");
   }
   void _idiv(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Idiv::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Idiv>(Dest, Src0, Src1);
   }
   void _imul(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Imul::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Imul>(Dest, Src0);
   }
   void _imul_imm(Variable *Dest, Operand *Src0, Constant *Imm) {
-    Context.insert(Traits::Insts::ImulImm::create(Func, Dest, Src0, Imm));
+    Context.insert<typename Traits::Insts::ImulImm>(Dest, Src0, Imm);
   }
   void _insertps(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Insertps::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Insertps>(Dest, Src0, Src1);
   }
   void _jmp(Operand *Target) {
-    Context.insert(Traits::Insts::Jmp::create(Func, Target));
+    Context.insert<typename Traits::Insts::Jmp>(Target);
   }
   void _lea(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Lea::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Lea>(Dest, Src0);
   }
-  void _mfence() { Context.insert(Traits::Insts::Mfence::create(Func)); }
+  void _mfence() { Context.insert<typename Traits::Insts::Mfence>(); }
   /// Moves can be used to redefine registers, creating "partial kills" for
   /// liveness.  Mark where moves are used in this way.
   void _redefined(Inst *MovInst, bool IsRedefinition = true) {
@@ -483,220 +481,214 @@
   /// If Dest=nullptr is passed in, then a new variable is created, marked as
   /// infinite register allocation weight, and returned through the in/out Dest
   /// argument.
-  Inst *_mov(Variable *&Dest, Operand *Src0,
-             int32_t RegNum = Variable::NoRegister) {
+  typename Traits::Insts::Mov *_mov(Variable *&Dest, Operand *Src0,
+                                    int32_t RegNum = Variable::NoRegister) {
     if (Dest == nullptr)
       Dest = makeReg(Src0->getType(), RegNum);
-    Inst *NewInst = Traits::Insts::Mov::create(Func, Dest, Src0);
-    Context.insert(NewInst);
-    return NewInst;
+    return Context.insert<typename Traits::Insts::Mov>(Dest, Src0);
   }
-  Inst *_movp(Variable *Dest, Operand *Src0) {
-    Inst *NewInst = Traits::Insts::Movp::create(Func, Dest, Src0);
-    Context.insert(NewInst);
-    return NewInst;
+  typename Traits::Insts::Movp *_movp(Variable *Dest, Operand *Src0) {
+    return Context.insert<typename Traits::Insts::Movp>(Dest, Src0);
   }
   void _movd(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Movd::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Movd>(Dest, Src0);
   }
   void _movq(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Movq::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Movq>(Dest, Src0);
   }
   void _movss(Variable *Dest, Variable *Src0) {
-    Context.insert(Traits::Insts::MovssRegs::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::MovssRegs>(Dest, Src0);
   }
   void _movsx(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Movsx::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Movsx>(Dest, Src0);
   }
   void _movzx(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Movzx::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Movzx>(Dest, Src0);
   }
   void _maxss(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Maxss::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Maxss>(Dest, Src0);
   }
   void _minss(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Minss::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Minss>(Dest, Src0);
   }
   void _mul(Variable *Dest, Variable *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Mul::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Mul>(Dest, Src0, Src1);
   }
   void _mulps(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Mulps::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Mulps>(Dest, Src0);
   }
   void _mulss(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Mulss::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Mulss>(Dest, Src0);
   }
   void _neg(Variable *SrcDest) {
-    Context.insert(Traits::Insts::Neg::create(Func, SrcDest));
+    Context.insert<typename Traits::Insts::Neg>(SrcDest);
   }
   void _nop(SizeT Variant) {
-    Context.insert(Traits::Insts::Nop::create(Func, Variant));
+    Context.insert<typename Traits::Insts::Nop>(Variant);
   }
   void _or(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Or::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Or>(Dest, Src0);
   }
   void _orps(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Orps::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Orps>(Dest, Src0);
   }
   void _or_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
-    Context.insert(Traits::Insts::OrRMW::create(Func, DestSrc0, Src1));
+    Context.insert<typename Traits::Insts::OrRMW>(DestSrc0, Src1);
   }
   void _padd(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Padd::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Padd>(Dest, Src0);
   }
   void _pand(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Pand::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Pand>(Dest, Src0);
   }
   void _pandn(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Pandn::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Pandn>(Dest, Src0);
   }
   void _pblendvb(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Pblendvb::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Pblendvb>(Dest, Src0, Src1);
   }
   void _pcmpeq(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Pcmpeq::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Pcmpeq>(Dest, Src0);
   }
   void _pcmpgt(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Pcmpgt::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Pcmpgt>(Dest, Src0);
   }
   void _pextr(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Pextr::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Pextr>(Dest, Src0, Src1);
   }
   void _pinsr(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Pinsr::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Pinsr>(Dest, Src0, Src1);
   }
   void _pmull(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Pmull::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Pmull>(Dest, Src0);
   }
   void _pmuludq(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Pmuludq::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Pmuludq>(Dest, Src0);
   }
   void _pop(Variable *Dest) {
-    Context.insert(Traits::Insts::Pop::create(Func, Dest));
+    Context.insert<typename Traits::Insts::Pop>(Dest);
   }
   void _por(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Por::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Por>(Dest, Src0);
   }
   void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Pshufd::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Pshufd>(Dest, Src0, Src1);
   }
   void _psll(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Psll::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Psll>(Dest, Src0);
   }
   void _psra(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Psra::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Psra>(Dest, Src0);
   }
   void _psrl(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Psrl::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Psrl>(Dest, Src0);
   }
   void _psub(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Psub::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Psub>(Dest, Src0);
   }
   void _push(Variable *Src0) {
-    Context.insert(Traits::Insts::Push::create(Func, Src0));
+    Context.insert<typename Traits::Insts::Push>(Src0);
   }
   void _pxor(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Pxor::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Pxor>(Dest, Src0);
   }
   void _ret(Variable *Src0 = nullptr) {
-    Context.insert(Traits::Insts::Ret::create(Func, Src0));
+    Context.insert<typename Traits::Insts::Ret>(Src0);
   }
   void _rol(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Rol::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Rol>(Dest, Src0);
   }
   void _sar(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Sar::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Sar>(Dest, Src0);
   }
   void _sbb(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Sbb::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Sbb>(Dest, Src0);
   }
   void _sbb_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
-    Context.insert(Traits::Insts::SbbRMW::create(Func, DestSrc0, Src1));
+    Context.insert<typename Traits::Insts::SbbRMW>(DestSrc0, Src1);
   }
   void _setcc(Variable *Dest, typename Traits::Cond::BrCond Condition) {
-    Context.insert(Traits::Insts::Setcc::create(Func, Dest, Condition));
+    Context.insert<typename Traits::Insts::Setcc>(Dest, Condition);
   }
   void _shl(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Shl::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Shl>(Dest, Src0);
   }
   void _shld(Variable *Dest, Variable *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Shld::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Shld>(Dest, Src0, Src1);
   }
   void _shr(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Shr::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Shr>(Dest, Src0);
   }
   void _shrd(Variable *Dest, Variable *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Shrd::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Shrd>(Dest, Src0, Src1);
   }
   void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Shufps::create(Func, Dest, Src0, Src1));
+    Context.insert<typename Traits::Insts::Shufps>(Dest, Src0, Src1);
   }
   void _sqrtss(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Sqrtss::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Sqrtss>(Dest, Src0);
   }
   void _store(Operand *Value, typename Traits::X86Operand *Mem) {
-    Context.insert(Traits::Insts::Store::create(Func, Value, Mem));
+    Context.insert<typename Traits::Insts::Store>(Value, Mem);
   }
   void _storep(Variable *Value, typename Traits::X86OperandMem *Mem) {
-    Context.insert(Traits::Insts::StoreP::create(Func, Value, Mem));
+    Context.insert<typename Traits::Insts::StoreP>(Value, Mem);
   }
   void _storeq(Variable *Value, typename Traits::X86OperandMem *Mem) {
-    Context.insert(Traits::Insts::StoreQ::create(Func, Value, Mem));
+    Context.insert<typename Traits::Insts::StoreQ>(Value, Mem);
   }
   void _sub(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Sub::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Sub>(Dest, Src0);
   }
   void _sub_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
-    Context.insert(Traits::Insts::SubRMW::create(Func, DestSrc0, Src1));
+    Context.insert<typename Traits::Insts::SubRMW>(DestSrc0, Src1);
   }
   void _subps(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Subps::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Subps>(Dest, Src0);
   }
   void _subss(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Subss::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Subss>(Dest, Src0);
   }
   void _test(Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Test::create(Func, Src0, Src1));
+    Context.insert<typename Traits::Insts::Test>(Src0, Src1);
   }
   void _ucomiss(Operand *Src0, Operand *Src1) {
-    Context.insert(Traits::Insts::Ucomiss::create(Func, Src0, Src1));
+    Context.insert<typename Traits::Insts::Ucomiss>(Src0, Src1);
   }
-  void _ud2() { Context.insert(Traits::Insts::UD2::create(Func)); }
+  void _ud2() { Context.insert<typename Traits::Insts::UD2>(); }
   void _xadd(Operand *Dest, Variable *Src, bool Locked) {
-    Context.insert(Traits::Insts::Xadd::create(Func, Dest, Src, Locked));
+    Context.insert<typename Traits::Insts::Xadd>(Dest, Src, Locked);
     // The xadd exchanges Dest and Src (modifying Src). Model that update with
     // a FakeDef followed by a FakeUse.
-    Context.insert(
-        InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
+    Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest));
     _set_dest_redefined();
-    Context.insert(InstFakeUse::create(Func, Src));
+    Context.insert<InstFakeUse>(Src);
   }
   void _xchg(Operand *Dest, Variable *Src) {
-    Context.insert(Traits::Insts::Xchg::create(Func, Dest, Src));
+    Context.insert<typename Traits::Insts::Xchg>(Dest, Src);
     // The xchg modifies Dest and Src -- model that update with a
     // FakeDef/FakeUse.
-    Context.insert(
-        InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
+    Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest));
     _set_dest_redefined();
-    Context.insert(InstFakeUse::create(Func, Src));
+    Context.insert<InstFakeUse>(Src);
   }
   void _xor(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Xor::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Xor>(Dest, Src0);
   }
   void _xorps(Variable *Dest, Operand *Src0) {
-    Context.insert(Traits::Insts::Xorps::create(Func, Dest, Src0));
+    Context.insert<typename Traits::Insts::Xorps>(Dest, Src0);
   }
   void _xor_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
-    Context.insert(Traits::Insts::XorRMW::create(Func, DestSrc0, Src1));
+    Context.insert<typename Traits::Insts::XorRMW>(DestSrc0, Src1);
   }
 
   void _iaca_start() {
     if (!BuildDefs::minimal())
-      Context.insert(Traits::Insts::IacaStart::create(Func));
+      Context.insert<typename Traits::Insts::IacaStart>();
   }
   void _iaca_end() {
     if (!BuildDefs::minimal())
-      Context.insert(Traits::Insts::IacaEnd::create(Func));
+      Context.insert<typename Traits::Insts::IacaEnd>();
   }
 
   /// This class helps wrap IACA markers around the code generated by the
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 01a5c44..d251e6a 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1016,7 +1016,7 @@
       // value to Dest, as Dest is rematerializable.
       assert(Dest->isRematerializable());
       FixedAllocaSizeBytes += Value;
-      Context.insert(InstFakeDef::create(Func, Dest));
+      Context.insert<InstFakeDef>(Dest);
     } else {
       _sub(esp, Ctx->getConstantInt32(Value));
     }
@@ -1358,7 +1358,7 @@
 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
   Variable *Dest = Inst->getDest();
   if (Dest->isRematerializable()) {
-    Context.insert(InstFakeDef::create(Func, Dest));
+    Context.insert<InstFakeDef>(Dest);
     return;
   }
   Type Ty = Dest->getType();
@@ -1476,7 +1476,7 @@
       _mul(T_4Lo, T_3, Src1Lo);
       // The mul instruction produces two dest variables, edx:eax. We create a
       // fake definition of edx to account for this.
-      Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
+      Context.insert<InstFakeDef>(T_4Hi, T_4Lo);
       _mov(DestLo, T_4Lo);
       _add(T_4Hi, T_1);
       _add(T_4Hi, T_2);
@@ -1911,7 +1911,7 @@
 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
   Variable *Dest = Inst->getDest();
   if (Dest->isRematerializable()) {
-    Context.insert(InstFakeDef::create(Func, Dest));
+    Context.insert<InstFakeDef>(Dest);
     return;
   }
   Operand *Src = Inst->getSrc(0);
@@ -2377,7 +2377,7 @@
         // Technically, the Spill is defined after the _store happens, but
         // SpillLo is considered a "use" of Spill so define Spill before it is
         // used.
-        Context.insert(InstFakeDef::create(Func, Spill));
+        Context.insert<InstFakeDef>(Spill);
         _store(T_Lo, SpillLo);
         _mov(T_Hi, hiOperand(Src0));
         _store(T_Hi, SpillHi);
@@ -2450,7 +2450,7 @@
       // used here.
       // _movss is a binary instruction, so the FakeDef is needed to keep the
       // live range analysis consistent.
-      Context.insert(InstFakeDef::create(Func, ExtractedElementR));
+      Context.insert<InstFakeDef>(ExtractedElementR);
       _movss(ExtractedElementR, T);
     }
   } else {
@@ -2886,7 +2886,7 @@
       // sometimes avoid a move before the OR.
       _mov(Temp, Src0HiRM);
       _or(Temp, Src0LoRM);
-      Context.insert(InstFakeUse::create(Func, Temp));
+      Context.insert<InstFakeUse>(Temp);
       setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
       return;
     case InstIcmp::Ne:
@@ -2895,7 +2895,7 @@
       // sometimes avoid a move before the OR.
       _mov(Temp, Src0HiRM);
       _or(Temp, Src0LoRM);
-      Context.insert(InstFakeUse::create(Func, Temp));
+      Context.insert<InstFakeUse>(Temp);
       setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
       return;
     case InstIcmp::Uge:
@@ -3060,8 +3060,8 @@
     llvm::report_fatal_error("Expected a consumer instruction");
   }
   if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
-    Context.insert(InstFakeUse::create(Func, T));
-    Context.insert(InstFakeDef::create(Func, Dest));
+    Context.insert<InstFakeUse>(T);
+    Context.insert<InstFakeDef>(Dest);
     _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
     return;
   }
@@ -3290,8 +3290,8 @@
         auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
         lowerCast(Cast);
         // Make sure that the atomic load isn't elided when unused.
-        Context.insert(InstFakeUse::create(Func, Dest64On32->getLo()));
-        Context.insert(InstFakeUse::create(Func, Dest64On32->getHi()));
+        Context.insert<InstFakeUse>(Dest64On32->getLo());
+        Context.insert<InstFakeUse>(Dest64On32->getHi());
         return;
       }
     }
@@ -3300,8 +3300,7 @@
     // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
     // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
     // the FakeUse on the last-inserted instruction's dest.
-    Context.insert(
-        InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
+    Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
     return;
   }
   case Intrinsics::AtomicRMW:
@@ -3840,17 +3839,17 @@
       if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) {
         auto *ValLo = llvm::cast<Variable>(loOperand(ValVar));
         auto *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
-        Context.insert(InstFakeUse::create(Func, ValLo));
-        Context.insert(InstFakeUse::create(Func, ValHi));
+        Context.insert<InstFakeUse>(ValLo);
+        Context.insert<InstFakeUse>(ValHi);
       }
     } else {
       // For xchg, the loop is slightly smaller and ebx/ecx are used.
-      Context.insert(InstFakeUse::create(Func, T_ebx));
-      Context.insert(InstFakeUse::create(Func, T_ecx));
+      Context.insert<InstFakeUse>(T_ebx);
+      Context.insert<InstFakeUse>(T_ecx);
     }
     // The address base (if any) is also reused in the loop.
     if (Variable *Base = Addr->getBase())
-      Context.insert(InstFakeUse::create(Func, Base));
+      Context.insert<InstFakeUse>(Base);
     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
     _mov(DestLo, T_eax);
@@ -3875,9 +3874,7 @@
   }
   Variable *T_eax = makeReg(Ty, Eax);
   _mov(T_eax, Addr);
-  typename Traits::Insts::Label *Label =
-      Traits::Insts::Label::create(Func, this);
-  Context.insert(Label);
+  auto *Label = Context.insert<typename Traits::Insts::Label>(this);
   // We want to pick a different register for T than Eax, so don't use
   // _mov(T == nullptr, T_eax).
   Variable *T = makeReg(Ty);
@@ -3889,11 +3886,11 @@
   // If Val is a variable, model the extended live range of Val through
   // the end of the loop, since it will be re-used by the loop.
   if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) {
-    Context.insert(InstFakeUse::create(Func, ValVar));
+    Context.insert<InstFakeUse>(ValVar);
   }
   // The address base (if any) is also reused in the loop.
   if (Variable *Base = Addr->getBase())
-    Context.insert(InstFakeUse::create(Func, Base));
+    Context.insert<InstFakeUse>(Base);
   _mov(Dest, T_eax);
 }
 
@@ -4660,8 +4657,7 @@
   if (Var->getRegNum() == Traits::RegisterSet::Reg_esp)
     return;
 
-  typename Traits::Insts::Label *Label =
-      Traits::Insts::Label::create(Func, this);
+  auto *Label = Traits::Insts::Label::create(Func, this);
   _cmp(Opnd, Ctx->getConstantZero(IceType_i32));
   _br(Traits::Cond::Br_e, Label);
   _cmp(Opnd, Ctx->getConstantInt32(1));
@@ -4711,7 +4707,7 @@
     }
     Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp,
                                          Index, Shift, SegmentReg);
-    Context.insert(InstLoad::create(Func, Dest, Addr));
+    Context.insert<InstLoad>(Dest, Addr);
   }
 }
 
@@ -4775,8 +4771,7 @@
     // The cmov instruction doesn't allow 8-bit or FP operands, so we need
     // explicit control flow.
     // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
-    typename Traits::Insts::Label *Label =
-        Traits::Insts::Label::create(Func, this);
+    auto *Label = Traits::Insts::Label::create(Func, this);
     SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
     _mov(Dest, SrcT);
     _br(Cond, Label);
@@ -5018,10 +5013,9 @@
     }
     Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
                                          Index, Shift, SegmentReg);
-    auto *NewStore = InstStore::create(Func, Data, Addr);
+    auto *NewStore = Context.insert<InstStore>(Data, Addr);
     if (Inst->getDest())
       NewStore->setRmwBeacon(Inst->getRmwBeacon());
-    Context.insert(NewStore);
   }
 }
 
@@ -5273,24 +5267,23 @@
 
     // Extract the next two inputs.
     Variable *Op0 = Func->makeVariable(ElementTy);
-    Context.insert(InstExtractElement::create(Func, Op0, Src0, Index));
+    Context.insert<InstExtractElement>(Op0, Src0, Index);
     Variable *Op1 = Func->makeVariable(ElementTy);
-    Context.insert(InstExtractElement::create(Func, Op1, Src1, Index));
+    Context.insert<InstExtractElement>(Op1, Src1, Index);
 
     // Perform the arithmetic as a scalar operation.
     Variable *Res = Func->makeVariable(ElementTy);
-    auto *Arith = InstArithmetic::create(Func, Kind, Res, Op0, Op1);
-    Context.insert(Arith);
+    auto *Arith = Context.insert<InstArithmetic>(Kind, Res, Op0, Op1);
     // We might have created an operation that needed a helper call.
     genTargetHelperCallFor(Arith);
 
     // Insert the result into position.
     Variable *DestT = Func->makeVariable(Ty);
-    Context.insert(InstInsertElement::create(Func, DestT, T, Res, Index));
+    Context.insert<InstInsertElement>(DestT, T, Res, Index);
     T = DestT;
   }
 
-  Context.insert(InstAssign::create(Func, Dest, T));
+  Context.insert<InstAssign>(Dest, T);
 }
 
 /// The following pattern occurs often in lowered C and C++ code:
@@ -5581,7 +5574,7 @@
         HelperName = H_bitcast_i8_8xi1;
         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
         // Arguments to functions are required to be at least 32 bits wide.
-        Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
+        Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
         Src0 = Src0AsI32;
       } break;
       case IceType_v16i1: {
@@ -5589,7 +5582,7 @@
         HelperName = H_bitcast_i16_16xi1;
         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
         // Arguments to functions are required to be at least 32 bits wide.
-        Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
+        Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
         Src0 = Src0AsI32;
       } break;
       }
@@ -5724,13 +5717,13 @@
     break;
   case IceType_f32:
   case IceType_f64:
-    Context.insert(InstFakeDef::create(Func, Reg));
+    Context.insert<InstFakeDef>(Reg);
     _xorps(Reg, Reg);
     break;
   default:
     // All vector types use the same pxor instruction.
     assert(isVectorType(Ty));
-    Context.insert(InstFakeDef::create(Func, Reg));
+    Context.insert<InstFakeDef>(Reg);
     _pxor(Reg, Reg);
     break;
   }
@@ -5754,7 +5747,7 @@
                                                         int32_t RegNum) {
   Variable *MinusOnes = makeReg(Ty, RegNum);
   // Insert a FakeDef so the live range of MinusOnes is not overestimated.
-  Context.insert(InstFakeDef::create(Func, MinusOnes));
+  Context.insert<InstFakeDef>(MinusOnes);
   _pcmpeq(MinusOnes, MinusOnes);
   return MinusOnes;
 }
@@ -6064,7 +6057,7 @@
     //
     // If in the future the implementation is changed to lower undef values to
     // uninitialized registers, a FakeDef will be needed:
-    //     Context.insert(InstFakeDef::create(Func, Reg));
+    //     Context.insert<InstFakeDef>(Reg);
     // This is in order to ensure that the live range of Reg is not
     // overestimated.  If the constant being lowered is a 64 bit value, then
     // the result should be split and the lo and hi components will need to go