Iasm and obj lowering for advanced switch lowering.

Jump table emission is delayed until offsets are known. X86 local jumps can be
near or far. Sanboxing is applied to indirect jumps from jump table.

BUG=
R=stichnot@chromium.org, jvoung

Review URL: https://codereview.chromium.org/1257283004.
diff --git a/src/IceAssembler.h b/src/IceAssembler.h
index 437dfc3..d53a3be 100644
--- a/src/IceAssembler.h
+++ b/src/IceAssembler.h
@@ -26,9 +26,68 @@
 
 #include "IceDefs.h"
 #include "IceFixups.h"
+#include "IceUtils.h"
 
 namespace Ice {
 
+/// A Label can be in one of three states:
+///  - Unused.
+///  - Linked, unplaced and tracking the position of branches to the label.
+///  - Bound, placed and tracking its position.
+class Label {
+  Label(const Label &) = delete;
+  Label &operator=(const Label &) = delete;
+
+public:
+  Label() = default;
+  ~Label() = default;
+
+  virtual void finalCheck() const {
+    // Assert if label is being destroyed with unresolved branches pending.
+    assert(!isLinked());
+  }
+
+  /// Returns the position for bound labels (branches that come after this are
+  /// considered backward branches). Cannot be used for unused or linked labels.
+  intptr_t getPosition() const {
+    assert(isBound());
+    return -Position - kWordSize;
+  }
+
+  /// Returns the position of an earlier branch instruction that was linked to
+  /// this label (branches that use this are considered forward branches).  The
+  /// linked instructions form a linked list, of sorts, using the instruction's
+  /// displacement field for the location of the next instruction that is also
+  /// linked to this label.
+  intptr_t getLinkPosition() const {
+    assert(isLinked());
+    return Position - kWordSize;
+  }
+
+  bool isBound() const { return Position < 0; }
+  bool isLinked() const { return Position > 0; }
+  virtual bool isUnused() const { return Position == 0; }
+
+protected:
+  void bindTo(intptr_t position) {
+    assert(!isBound());
+    Position = -position - kWordSize;
+    assert(isBound());
+  }
+
+  void linkTo(intptr_t position) {
+    assert(!isBound());
+    Position = position + kWordSize;
+    assert(isLinked());
+  }
+
+  intptr_t Position = 0;
+
+private:
+  // TODO(jvoung): why are labels offset by this?
+  static constexpr uint32_t kWordSize = sizeof(uint32_t);
+};
+
 /// Assembler buffers are used to emit binary code. They grow on demand.
 class AssemblerBuffer {
   AssemblerBuffer(const AssemblerBuffer &) = delete;
@@ -175,15 +234,22 @@
 
   /// Align the tail end of the function to the required target alignment.
   virtual void alignFunction() = 0;
+  /// Align the tail end of the basic block to the required target alignment.
+  void alignCfgNode() {
+    const SizeT Align = 1 << getBundleAlignLog2Bytes();
+    padWithNop(Utils::OffsetToAlignment(Buffer.getPosition(), Align));
+  }
 
   /// Add nop padding of a particular width to the current bundle.
   virtual void padWithNop(intptr_t Padding) = 0;
 
   virtual SizeT getBundleAlignLog2Bytes() const = 0;
 
-  virtual const char *getNonExecPadDirective() const = 0;
+  virtual const char *getAlignDirective() const = 0;
   virtual llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const = 0;
 
+  /// Get the label for a CfgNode.
+  virtual Label *getOrCreateCfgNodeLabel(SizeT NodeNumber) = 0;
   /// Mark the current text location as the start of a CFG node
   /// (represented by NodeNumber).
   virtual void bindCfgNodeLabel(SizeT NodeNumber) = 0;
diff --git a/src/IceAssemblerARM32.h b/src/IceAssemblerARM32.h
index 977b43d..84f2da8 100644
--- a/src/IceAssemblerARM32.h
+++ b/src/IceAssemblerARM32.h
@@ -47,7 +47,7 @@
 
   SizeT getBundleAlignLog2Bytes() const override { return 4; }
 
-  const char *getNonExecPadDirective() const override { return ".p2alignl"; }
+  const char *getAlignDirective() const override { return ".p2alignl"; }
 
   llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
     // Use a particular UDF encoding -- TRAPNaCl in LLVM: 0xE7FEDEF0
@@ -61,6 +61,11 @@
     llvm_unreachable("Not yet implemented.");
   }
 
+  Label *getOrCreateCfgNodeLabel(SizeT NodeNumber) override {
+    (void)NodeNumber;
+    llvm_unreachable("Not yet implemented.");
+  }
+
   void bindCfgNodeLabel(SizeT NodeNumber) override {
     (void)NodeNumber;
     llvm_unreachable("Not yet implemented.");
diff --git a/src/IceAssemblerMIPS32.h b/src/IceAssemblerMIPS32.h
index 06b1487..1503bf2 100644
--- a/src/IceAssemblerMIPS32.h
+++ b/src/IceAssemblerMIPS32.h
@@ -49,7 +49,7 @@
 
   SizeT getBundleAlignLog2Bytes() const override { return 4; }
 
-  const char *getNonExecPadDirective() const override { return ".p2alignl"; }
+  const char *getAlignDirective() const override { return ".p2alignl"; }
 
   llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
     // TODO(reed kotler) . Find out what this should be.
@@ -62,6 +62,11 @@
     llvm::report_fatal_error("Not yet implemented.");
   }
 
+  Label *getOrCreateCfgNodeLabel(SizeT NodeNumber) override {
+    (void)NodeNumber;
+    llvm_unreachable("Not yet implemented.");
+  }
+
   void bindCfgNodeLabel(SizeT NodeNumber) override {
     (void)NodeNumber;
     llvm::report_fatal_error("Not yet implemented.");
diff --git a/src/IceAssemblerX8632.h b/src/IceAssemblerX8632.h
index fb9eedf..13f5606 100644
--- a/src/IceAssemblerX8632.h
+++ b/src/IceAssemblerX8632.h
@@ -38,7 +38,6 @@
 namespace X8632 {
 
 using Immediate = ::Ice::X86Internal::Immediate;
-using Label = ::Ice::X86Internal::Label;
 
 class AssemblerX8632 : public X86Internal::AssemblerX86Base<TargetX8632> {
   AssemblerX8632(const AssemblerX8632 &) = delete;
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 4d0333a..252903e 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -71,88 +71,41 @@
   AssemblerFixup *fixup_ = nullptr;
 };
 
-class Label {
+/// X86 allows near and far jumps.
+class Label final : public Ice::Label {
   Label(const Label &) = delete;
   Label &operator=(const Label &) = delete;
 
 public:
-  Label() {
-    if (BuildDefs::asserts()) {
-      for (int i = 0; i < kMaxUnresolvedBranches; i++) {
-        unresolved_near_positions_[i] = -1;
-      }
-    }
-  }
-
+  Label() = default;
   ~Label() = default;
 
-  void FinalCheck() const {
-    // Assert if label is being destroyed with unresolved branches pending.
-    assert(!IsLinked());
-    assert(!HasNear());
+  void finalCheck() const override {
+    Ice::Label::finalCheck();
+    assert(!hasNear());
   }
 
-  // TODO(jvoung): why are labels offset by this?
-  static const uint32_t kWordSize = sizeof(uint32_t);
-
-  // Returns the position for bound labels (branches that come after this
-  // are considered backward branches). Cannot be used for unused or linked
-  // labels.
-  intptr_t Position() const {
-    assert(IsBound());
-    return -position_ - kWordSize;
+  /// Returns the position of an earlier branch instruction which assumes that
+  /// this label is "near", and bumps iterator to the next near position.
+  intptr_t getNearPosition() {
+    assert(hasNear());
+    intptr_t Pos = UnresolvedNearPositions.back();
+    UnresolvedNearPositions.pop_back();
+    return Pos;
   }
 
-  // Returns the position of an earlier branch instruction that was linked
-  // to this label (branches that use this are considered forward branches).
-  // The linked instructions form a linked list, of sorts, using the
-  // instruction's displacement field for the location of the next
-  // instruction that is also linked to this label.
-  intptr_t LinkPosition() const {
-    assert(IsLinked());
-    return position_ - kWordSize;
+  bool hasNear() const { return !UnresolvedNearPositions.empty(); }
+  bool isUnused() const override {
+    return Ice::Label::isUnused() && !hasNear();
   }
 
-  // Returns the position of an earlier branch instruction which
-  // assumes that this label is "near", and bumps iterator to the
-  // next near position.
-  intptr_t NearPosition() {
-    assert(HasNear());
-    return unresolved_near_positions_[--num_unresolved_];
-  }
-
-  bool IsBound() const { return position_ < 0; }
-  bool IsLinked() const { return position_ > 0; }
-  bool IsUnused() const { return (position_ == 0) && (num_unresolved_ == 0); }
-  bool HasNear() const { return num_unresolved_ != 0; }
-
 private:
-  void BindTo(intptr_t position) {
-    assert(!IsBound());
-    assert(!HasNear());
-    position_ = -position - kWordSize;
-    assert(IsBound());
+  void nearLinkTo(intptr_t position) {
+    assert(!isBound());
+    UnresolvedNearPositions.push_back(position);
   }
 
-  void LinkTo(intptr_t position) {
-    assert(!IsBound());
-    position_ = position + kWordSize;
-    assert(IsLinked());
-  }
-
-  void NearLinkTo(intptr_t position) {
-    assert(!IsBound());
-    assert(num_unresolved_ < kMaxUnresolvedBranches);
-    unresolved_near_positions_[num_unresolved_++] = position;
-  }
-
-  static constexpr int kMaxUnresolvedBranches = 20;
-
-  intptr_t position_ = 0;
-  intptr_t num_unresolved_ = 0;
-  // TODO(stichnot,jvoung): Can this instead be
-  // llvm::SmallVector<intptr_t, kMaxUnresolvedBranches> ?
-  intptr_t unresolved_near_positions_[kMaxUnresolvedBranches];
+  llvm::SmallVector<intptr_t, 20> UnresolvedNearPositions;
 
   template <class> friend class AssemblerX86Base;
 };
@@ -181,7 +134,7 @@
 
   SizeT getBundleAlignLog2Bytes() const override { return 5; }
 
-  const char *getNonExecPadDirective() const override { return ".p2align"; }
+  const char *getAlignDirective() const override { return ".p2align"; }
 
   llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
     static const uint8_t Padding[] = {0xF4};
@@ -197,10 +150,10 @@
       nop(Padding);
   }
 
-  Label *GetOrCreateCfgNodeLabel(SizeT NodeNumber);
+  Label *getOrCreateCfgNodeLabel(SizeT NodeNumber) override;
   void bindCfgNodeLabel(SizeT NodeNumber) override;
-  Label *GetOrCreateLocalLabel(SizeT Number);
-  void BindLocalLabel(SizeT Number);
+  Label *getOrCreateLocalLabel(SizeT Number);
+  void bindLocalLabel(SizeT Number);
 
   bool fixupIsPCRel(FixupKind Kind) const override {
     // Currently assuming this is the only PC-rel relocation type used.
@@ -926,7 +879,7 @@
   // A vector of pool-allocated x86 labels for Local labels.
   LabelVector LocalLabels;
 
-  Label *GetOrCreateLabel(SizeT Number, LabelVector &Labels);
+  Label *getOrCreateLabel(SizeT Number, LabelVector &Labels);
 
   // The arith_int() methods factor out the commonality between the encodings of
   // add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp().  The Tag
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 0661c9f..2f9cda8 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -31,18 +31,18 @@
 AssemblerX86Base<Machine>::~AssemblerX86Base<Machine>() {
   if (BuildDefs::asserts()) {
     for (const Label *Label : CfgNodeLabels) {
-      Label->FinalCheck();
+      Label->finalCheck();
     }
     for (const Label *Label : LocalLabels) {
-      Label->FinalCheck();
+      Label->finalCheck();
     }
   }
 }
 
 template <class Machine> void AssemblerX86Base<Machine>::alignFunction() {
-  SizeT Align = 1 << getBundleAlignLog2Bytes();
+  const SizeT Align = 1 << getBundleAlignLog2Bytes();
   SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
-  const SizeT HltSize = 1;
+  constexpr SizeT HltSize = 1;
   while (BytesNeeded > 0) {
     hlt();
     BytesNeeded -= HltSize;
@@ -50,7 +50,7 @@
 }
 
 template <class Machine>
-Label *AssemblerX86Base<Machine>::GetOrCreateLabel(SizeT Number,
+Label *AssemblerX86Base<Machine>::getOrCreateLabel(SizeT Number,
                                                    LabelVector &Labels) {
   Label *L = nullptr;
   if (Number == Labels.size()) {
@@ -70,25 +70,25 @@
 }
 
 template <class Machine>
-Label *AssemblerX86Base<Machine>::GetOrCreateCfgNodeLabel(SizeT NodeNumber) {
-  return GetOrCreateLabel(NodeNumber, CfgNodeLabels);
+Label *AssemblerX86Base<Machine>::getOrCreateCfgNodeLabel(SizeT NodeNumber) {
+  return getOrCreateLabel(NodeNumber, CfgNodeLabels);
 }
 
 template <class Machine>
-Label *AssemblerX86Base<Machine>::GetOrCreateLocalLabel(SizeT Number) {
-  return GetOrCreateLabel(Number, LocalLabels);
+Label *AssemblerX86Base<Machine>::getOrCreateLocalLabel(SizeT Number) {
+  return getOrCreateLabel(Number, LocalLabels);
 }
 
 template <class Machine>
 void AssemblerX86Base<Machine>::bindCfgNodeLabel(SizeT NodeNumber) {
   assert(!getPreliminary());
-  Label *L = GetOrCreateCfgNodeLabel(NodeNumber);
+  Label *L = getOrCreateCfgNodeLabel(NodeNumber);
   this->bind(L);
 }
 
 template <class Machine>
-void AssemblerX86Base<Machine>::BindLocalLabel(SizeT Number) {
-  Label *L = GetOrCreateLocalLabel(Number);
+void AssemblerX86Base<Machine>::bindLocalLabel(SizeT Number) {
+  Label *L = getOrCreateLocalLabel(Number);
   if (!getPreliminary())
     this->bind(L);
 }
@@ -2949,10 +2949,10 @@
 void AssemblerX86Base<Machine>::j(typename Traits::Cond::BrCond condition,
                                   Label *label, bool near) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (label->IsBound()) {
+  if (label->isBound()) {
     static const int kShortSize = 2;
     static const int kLongSize = 6;
-    intptr_t offset = label->Position() - Buffer.size();
+    intptr_t offset = label->getPosition() - Buffer.size();
     assert(offset <= 0);
     if (Utils::IsInt(8, offset - kShortSize)) {
       // TODO(stichnot): Here and in jmp(), we may need to be more
@@ -3000,10 +3000,10 @@
 template <class Machine>
 void AssemblerX86Base<Machine>::jmp(Label *label, bool near) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (label->IsBound()) {
+  if (label->isBound()) {
     static const int kShortSize = 2;
     static const int kLongSize = 5;
-    intptr_t offset = label->Position() - Buffer.size();
+    intptr_t offset = label->getPosition() - Buffer.size();
     assert(offset <= 0);
     if (Utils::IsInt(8, offset - kShortSize)) {
       emitUint8(0xEB);
@@ -3133,20 +3133,20 @@
 
 template <class Machine> void AssemblerX86Base<Machine>::bind(Label *label) {
   intptr_t bound = Buffer.size();
-  assert(!label->IsBound()); // Labels can only be bound once.
-  while (label->IsLinked()) {
-    intptr_t position = label->LinkPosition();
+  assert(!label->isBound()); // Labels can only be bound once.
+  while (label->isLinked()) {
+    intptr_t position = label->getLinkPosition();
     intptr_t next = Buffer.load<int32_t>(position);
     Buffer.store<int32_t>(position, bound - (position + 4));
-    label->position_ = next;
+    label->Position = next;
   }
-  while (label->HasNear()) {
-    intptr_t position = label->NearPosition();
+  while (label->hasNear()) {
+    intptr_t position = label->getNearPosition();
     intptr_t offset = bound - (position + 1);
     assert(Utils::IsInt(8, offset));
     Buffer.store<int8_t>(position, offset);
   }
-  label->BindTo(bound);
+  label->bindTo(bound);
 }
 
 template <class Machine>
@@ -3222,8 +3222,8 @@
 template <class Machine>
 void AssemblerX86Base<Machine>::emitLabel(Label *label,
                                           intptr_t instruction_size) {
-  if (label->IsBound()) {
-    intptr_t offset = label->Position() - Buffer.size();
+  if (label->isBound()) {
+    intptr_t offset = label->getPosition() - Buffer.size();
     assert(offset <= 0);
     emitInt32(offset - instruction_size);
   } else {
@@ -3233,20 +3233,20 @@
 
 template <class Machine>
 void AssemblerX86Base<Machine>::emitLabelLink(Label *Label) {
-  assert(!Label->IsBound());
+  assert(!Label->isBound());
   intptr_t Position = Buffer.size();
-  emitInt32(Label->position_);
+  emitInt32(Label->Position);
   if (!getPreliminary())
-    Label->LinkTo(Position);
+    Label->linkTo(Position);
 }
 
 template <class Machine>
 void AssemblerX86Base<Machine>::emitNearLabelLink(Label *label) {
-  assert(!label->IsBound());
+  assert(!label->isBound());
   intptr_t position = Buffer.size();
   emitUint8(0);
   if (!getPreliminary())
-    label->NearLinkTo(position);
+    label->nearLinkTo(position);
 }
 
 template <class Machine>
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 99ed0bc..9e5df99 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -568,6 +568,12 @@
   }
 }
 
+void Cfg::markNodesForSandboxing() {
+  for (const InstJumpTable *JT : JumpTables)
+    for (SizeT I = 0; I < JT->getNumTargets(); ++I)
+      JT->getTarget(I)->setNeedsAlignment();
+}
+
 // ======================== Dump routines ======================== //
 
 // emitTextHeader() is not target-specific (apart from what is
@@ -585,7 +591,7 @@
     Str << "\t.globl\t" << MangledName << "\n";
     Str << "\t.type\t" << MangledName << ",%function\n";
   }
-  Str << "\t" << Asm->getNonExecPadDirective() << " "
+  Str << "\t" << Asm->getAlignDirective() << " "
       << Asm->getBundleAlignLog2Bytes() << ",0x";
   for (uint8_t I : Asm->getNonExecBundlePadding())
     Str.write_hex(I);
@@ -593,6 +599,40 @@
   Str << MangledName << ":\n";
 }
 
+void Cfg::deleteJumpTableInsts() {
+  for (InstJumpTable *JumpTable : JumpTables)
+    JumpTable->setDeleted();
+}
+
+void Cfg::emitJumpTables() {
+  switch (Ctx->getFlags().getOutFileType()) {
+  case FT_Elf:
+  case FT_Iasm: {
+    // The emission needs to be delayed until the after the text section so save
+    // the offsets in the global context.
+    IceString MangledName = Ctx->mangleName(getFunctionName());
+    for (const InstJumpTable *JumpTable : JumpTables) {
+      SizeT NumTargets = JumpTable->getNumTargets();
+      JumpTableData &JT =
+          Ctx->addJumpTable(MangledName, JumpTable->getId(), NumTargets);
+      for (SizeT I = 0; I < NumTargets; ++I) {
+        SizeT Index = JumpTable->getTarget(I)->getIndex();
+        JT.pushTarget(
+            getAssembler()->getOrCreateCfgNodeLabel(Index)->getPosition());
+      }
+    }
+  } break;
+  case FT_Asm: {
+    // Emit the assembly directly so we don't need to hang on to all the names
+    for (const InstJumpTable *JumpTable : JumpTables)
+      getTarget()->emitJumpTable(this, JumpTable);
+  } break;
+  default:
+    llvm::report_fatal_error("Invalid out file type.");
+    break;
+  }
+}
+
 void Cfg::emit() {
   if (!BuildDefs::dump())
     return;
@@ -605,10 +645,20 @@
   }
   OstreamLocker L(Ctx);
   Ostream &Str = Ctx->getStrEmit();
-  IceString MangledName = getContext()->mangleName(getFunctionName());
-  emitTextHeader(MangledName, Ctx, getAssembler<>());
-  for (CfgNode *Node : Nodes)
+  IceString MangledName = Ctx->mangleName(getFunctionName());
+  const Assembler *Asm = getAssembler<>();
+  const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
+
+  emitTextHeader(MangledName, Ctx, Asm);
+  deleteJumpTableInsts();
+  for (CfgNode *Node : Nodes) {
+    if (NeedSandboxing && Node->needsAlignment()) {
+      Str << "\t" << Asm->getAlignDirective() << " "
+          << Asm->getBundleAlignLog2Bytes() << "\n";
+    }
     Node->emit(this);
+  }
+  emitJumpTables();
   Str << "\n";
 }
 
@@ -616,8 +666,14 @@
   TimerMarker T(TimerStack::TT_emit, this);
   // The emitIAS() routines emit into the internal assembler buffer,
   // so there's no need to lock the streams.
-  for (CfgNode *Node : Nodes)
+  deleteJumpTableInsts();
+  const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
+  for (CfgNode *Node : Nodes) {
+    if (NeedSandboxing && Node->needsAlignment())
+      getAssembler()->alignCfgNode();
     Node->emitIAS(this);
+  }
+  emitJumpTables();
 }
 
 // Dumps the IR with an optional introductory message.
diff --git a/src/IceCfg.h b/src/IceCfg.h
index 08b4e97..f2b5295 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -140,6 +140,13 @@
   const VarList &getImplicitArgs() const { return ImplicitArgs; }
   /// @}
 
+  /// \name Manage the jump tables.
+  /// @{
+  void addJumpTable(InstJumpTable *JumpTable) {
+    JumpTables.emplace_back(JumpTable);
+  }
+  /// @}
+
   /// \name Miscellaneous accessors.
   /// @{
   TargetLowering *getTarget() const { return Target.get(); }
@@ -183,6 +190,7 @@
   bool validateLiveness() const;
   void contractEmptyNodes();
   void doBranchOpt();
+  void markNodesForSandboxing();
 
   /// \name  Manage the CurrentNode field.
   /// CurrentNode is used for validating the Variable::DefNode field during
@@ -229,6 +237,12 @@
   /// code needs to be defined.
   void profileBlocks();
 
+  /// Delete registered jump table placeholder instructions. This should only be
+  /// called once all repointing has taken place.
+  void deleteJumpTableInsts();
+  /// Iterate through the registered jump tables and emit them.
+  void emitJumpTables();
+
   GlobalContext *Ctx;
   uint32_t SequenceNumber; /// output order for emission
   VerboseMask VMask;
@@ -252,6 +266,7 @@
   std::unique_ptr<Assembler> TargetAssembler;
   /// Globals required by this CFG. Mostly used for the profiler's globals.
   std::unique_ptr<VariableDeclarationList> GlobalInits;
+  std::vector<InstJumpTable *> JumpTables;
 
   /// CurrentNode is maintained during dumping/emitting just for
   /// validating Variable::DefNode.  Normally, a traversal over
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index 4f3d7d0..e909690 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -54,6 +54,9 @@
   void setNeedsPlacement(bool Value) { NeedsPlacement = Value; }
   bool needsPlacement() const { return NeedsPlacement; }
 
+  void setNeedsAlignment() { NeedsAlignment = true; }
+  bool needsAlignment() const { return NeedsAlignment; }
+
   /// \name Access predecessor and successor edge lists.
   /// @{
   const NodeList &getInEdges() const { return InEdges; }
@@ -110,6 +113,7 @@
       Cfg::IdentifierIndexInvalid; /// index into Cfg::NodeNames table
   bool HasReturn = false;          /// does this block need an epilog?
   bool NeedsPlacement = false;
+  bool NeedsAlignment = false;       /// is sandboxing required?
   InstNumberT InstCountEstimate = 0; /// rough instruction count estimate
   NodeList InEdges;                  /// in no particular order
   NodeList OutEdges;                 /// in no particular order
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index cbacd8b..5a4abe6 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -176,10 +176,6 @@
     TranslateOnly("translate-only",
                   cl::desc("Translate only the given function"), cl::init(""));
 
-cl::opt<bool>
-    UseAdvancedSwitchLowering("adv-switch",
-                              cl::desc("Use advanced switch lowering"));
-
 cl::opt<bool> UseSandboxing("sandbox", cl::desc("Use sandboxing"));
 
 cl::opt<std::string> VerboseFocusOn(
@@ -351,7 +347,6 @@
   OutFlags.SkipUnimplemented = false;
   OutFlags.SubzeroTimingEnabled = false;
   OutFlags.TimeEachFunction = false;
-  OutFlags.UseAdvancedSwitchLowering = false;
   OutFlags.UseSandboxing = false;
   // Enum and integer fields.
   OutFlags.Opt = Opt_m1;
@@ -413,7 +408,6 @@
   OutFlags.setTimeEachFunction(::TimeEachFunction);
   OutFlags.setTimingFocusOn(::TimingFocusOn);
   OutFlags.setTranslateOnly(::TranslateOnly);
-  OutFlags.setUseAdvancedSwitchLowering(::UseAdvancedSwitchLowering);
   OutFlags.setUseSandboxing(::UseSandboxing);
   OutFlags.setVerboseFocusOn(::VerboseFocusOn);
   OutFlags.setOutFileType(::OutFileType);
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 97420ea..549bf16 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -103,13 +103,6 @@
   }
   void setTimeEachFunction(bool NewValue) { TimeEachFunction = NewValue; }
 
-  bool getUseAdvancedSwitchLowering() const {
-    return UseAdvancedSwitchLowering;
-  }
-  void setUseAdvancedSwitchLowering(bool NewValue) {
-    UseAdvancedSwitchLowering = NewValue;
-  }
-
   bool getUseSandboxing() const { return UseSandboxing; }
   void setUseSandboxing(bool NewValue) { UseSandboxing = NewValue; }
 
@@ -242,7 +235,6 @@
   bool SkipUnimplemented;
   bool SubzeroTimingEnabled;
   bool TimeEachFunction;
-  bool UseAdvancedSwitchLowering;
   bool UseSandboxing;
 
   OptLevel Opt;
diff --git a/src/IceCompiler.cpp b/src/IceCompiler.cpp
index 59f18d2..23733ce 100644
--- a/src/IceCompiler.cpp
+++ b/src/IceCompiler.cpp
@@ -154,6 +154,7 @@
     Ctx.lowerGlobals("last");
     Ctx.lowerProfileData();
     Ctx.lowerConstants();
+    Ctx.lowerJumpTables();
 
     if (Ctx.getFlags().getOutFileType() == FT_Elf) {
       TimerMarker T1(Ice::TimerStack::TT_emit, &Ctx);
diff --git a/src/IceDefs.h b/src/IceDefs.h
index 4e1fb3a..374b4b3 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -58,7 +58,9 @@
 class GlobalDeclaration;
 class Inst;
 class InstAssign;
+class InstJumpTable;
 class InstPhi;
+class InstSwitch;
 class InstTarget;
 class LiveRange;
 class Liveness;
diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp
index 37b1d58..f32a2b6 100644
--- a/src/IceELFObjectWriter.cpp
+++ b/src/IceELFObjectWriter.cpp
@@ -20,6 +20,7 @@
 #include "IceELFStreamer.h"
 #include "IceGlobalContext.h"
 #include "IceGlobalInits.h"
+#include "IceInst.h"
 #include "IceOperand.h"
 #include "llvm/Support/MathExtras.h"
 
@@ -553,6 +554,44 @@
   writeRelocationSections(RelRODataSections);
 }
 
+void ELFObjectWriter::writeJumpTable(const JumpTableData &JT,
+                                     FixupKind RelocationKind) {
+  ELFDataSection *Section;
+  ELFRelocationSection *RelSection;
+  const Elf64_Xword PointerSize = typeWidthInBytes(getPointerType());
+  const Elf64_Xword ShAddralign = PointerSize;
+  const Elf64_Xword ShEntsize = PointerSize;
+  const IceString SectionName =
+      MangleSectionName(".rodata", JT.getFunctionName() + "$jumptable");
+  Section = createSection<ELFDataSection>(SectionName, SHT_PROGBITS, SHF_ALLOC,
+                                          ShAddralign, ShEntsize);
+  Section->setFileOffset(alignFileOffset(ShAddralign));
+  RODataSections.push_back(Section);
+  RelSection = createRelocationSection(Section);
+  RelRODataSections.push_back(RelSection);
+
+  const uint8_t SymbolType = STT_OBJECT;
+  Section->padToAlignment(Str, PointerSize);
+  bool IsExternal = Ctx.getFlags().getDisableInternal();
+  const uint8_t SymbolBinding = IsExternal ? STB_GLOBAL : STB_LOCAL;
+  IceString JumpTableName =
+      InstJumpTable::makeName(JT.getFunctionName(), JT.getId());
+  SymTab->createDefinedSym(JumpTableName, SymbolType, SymbolBinding, Section,
+                           Section->getCurrentSize(), PointerSize);
+  StrTab->add(JumpTableName);
+
+  for (intptr_t TargetOffset : JT.getTargetOffsets()) {
+    AssemblerFixup NewFixup;
+    NewFixup.set_position(Section->getCurrentSize());
+    NewFixup.set_kind(RelocationKind);
+    constexpr bool SuppressMangling = true;
+    NewFixup.set_value(Ctx.getConstantSym(TargetOffset, JT.getFunctionName(),
+                                          SuppressMangling));
+    RelSection->addRelocation(NewFixup);
+    Section->appendRelocationOffset(Str, RelSection->isRela(), TargetOffset);
+  }
+}
+
 void ELFObjectWriter::setUndefinedSyms(const ConstantList &UndefSyms) {
   for (const Constant *S : UndefSyms) {
     const auto Sym = llvm::cast<ConstantRelocatable>(S);
diff --git a/src/IceELFObjectWriter.h b/src/IceELFObjectWriter.h
index c2d7157..f5ae7b8 100644
--- a/src/IceELFObjectWriter.h
+++ b/src/IceELFObjectWriter.h
@@ -76,6 +76,9 @@
   /// fills the symbol table with labels for each constant pool entry.
   template <typename ConstType> void writeConstantPool(Type Ty);
 
+  /// Write a jump table and register fixups for the target addresses.
+  void writeJumpTable(const JumpTableData &JT, FixupKind RelocationKind);
+
   /// Populate the symbol table with a list of external/undefined symbols.
   void setUndefinedSyms(const ConstantList &UndefSyms);
 
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 51ca3f0..8bfc5dd 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -385,6 +385,8 @@
 
 void GlobalContext::lowerConstants() { DataLowering->lowerConstants(); }
 
+void GlobalContext::lowerJumpTables() { DataLowering->lowerJumpTables(); }
+
 void GlobalContext::lowerGlobals(const IceString &SectionSuffix) {
   TimerMarker T(TimerStack::TT_emitGlobalInitializers, this);
   const bool DumpGlobalVariables = BuildDefs::dump() && Flags.getVerbose() &&
@@ -876,6 +878,13 @@
   return getConstPool()->ExternRelocatables.getConstantPool();
 }
 
+JumpTableData &GlobalContext::addJumpTable(IceString FuncName, SizeT Id,
+                                           SizeT NumTargets) {
+  auto JumpTables = getJumpTables();
+  JumpTables->emplace_back(FuncName, Id, NumTargets);
+  return JumpTables->back();
+}
+
 TimerStackIdT GlobalContext::newTimerStackID(const IceString &Name) {
   if (!BuildDefs::dump())
     return 0;
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index d19249d..acef4c5 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -20,6 +20,7 @@
 #include "IceClFlags.h"
 #include "IceIntrinsics.h"
 #include "IceRNG.h"
+#include "IceSwitchLowering.h"
 #include "IceThreading.h"
 #include "IceTimerTree.h"
 #include "IceTypes.h"
@@ -55,6 +56,7 @@
   }
   ~LockedPtr() { Lock->unlock(); }
   T *operator->() const { return Value; }
+  T &operator*() const { return *Value; }
 
 private:
   T *Value;
@@ -209,6 +211,13 @@
   /// Returns a copy of the list of external symbols.
   ConstantList getConstantExternSyms();
 
+  /// Return a locked pointer to the registered jump tables.
+  LockedPtr<JumpTableDataList> getJumpTables() {
+    return LockedPtr<JumpTableDataList>(&JumpTables, &JumpTablesLock);
+  }
+  /// Create a new jump table entry and return a reference to it.
+  JumpTableData &addJumpTable(IceString FuncName, SizeT Id, SizeT NumTargets);
+
   const ClFlags &getFlags() const { return Flags; }
 
   bool isIRGenerationDisabled() const {
@@ -335,6 +344,8 @@
 
   void lowerConstants();
 
+  void lowerJumpTables();
+
   void emitQueueBlockingPush(EmitterWorkItem *Item);
   EmitterWorkItem *emitQueueBlockingPop();
   void emitQueueNotifyEnd() { EmitQ.notifyEnd(); }
@@ -456,6 +467,11 @@
   std::unique_ptr<ConstantPool> ConstPool;
 
   ICE_CACHELINE_BOUNDARY;
+  // Managed by getJumpTables()
+  GlobalLockType JumpTablesLock;
+  JumpTableDataList JumpTables;
+
+  ICE_CACHELINE_BOUNDARY;
   // Managed by getErrorStatus()
   GlobalLockType ErrorStatusLock;
   ErrorCode ErrorStatus;
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 7f49835..900b408 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -516,8 +516,7 @@
 
 InstJumpTable::InstJumpTable(Cfg *Func, SizeT NumTargets, CfgNode *Default)
     : InstHighLevel(Func, Inst::JumpTable, 1, nullptr),
-      LabelNumber(Func->getTarget()->makeNextLabelNumber()),
-      NumTargets(NumTargets) {
+      Id(Func->getTarget()->makeNextJumpTableNumber()), NumTargets(NumTargets) {
   Targets = Func->allocateArrayOf<CfgNode *>(NumTargets);
   for (SizeT I = 0; I < NumTargets; ++I)
     Targets[I] = Default;
@@ -534,11 +533,6 @@
   return Found;
 }
 
-IceString InstJumpTable::getName(const Cfg *Func) const {
-  return ".L" + Func->getFunctionName() + "$jumptable$__" +
-         std::to_string(LabelNumber);
-}
-
 Type InstCall::getReturnType() const {
   if (Dest == nullptr)
     return IceType_void;
@@ -952,29 +946,6 @@
   Str << "kill.pseudo scratch_regs";
 }
 
-void InstJumpTable::emit(const Cfg *Func) const {
-  // TODO(ascull): should this be a target specific lowering (with access built
-  // in?) and just have InstJumpTable as a high level, similar to br? or should
-  // this follow the same path as emitIAS i.e. put it in global context and
-  // produce this code later?
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  // TODO(ascull): softcode pointer size of 4
-  // TODO(ascull): is .long portable?
-  Str << "\n\t.section\t.rodata." << Func->getFunctionName()
-      << "$jumptable,\"a\",@progbits\n"
-      << "\t.align 4\n" << getName(Func) << ":";
-  for (SizeT I = 0; I < NumTargets; ++I)
-    Str << "\n\t.long\t" << Targets[I]->getAsmName();
-  Str << "\n\n\t.text";
-}
-
-void InstJumpTable::emitIAS(const Cfg *Func) const {
-  // TODO(ascull): put jump table in the global context for emission later
-  (void)Func;
-}
-
 void InstJumpTable::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
diff --git a/src/IceInst.h b/src/IceInst.h
index 48b18f4..6bfd7af 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -914,17 +914,24 @@
     return new (Func->allocate<InstJumpTable>())
         InstJumpTable(Func, NumTargets, Default);
   }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void addTarget(SizeT TargetIndex, CfgNode *Target) {
     assert(TargetIndex < NumTargets);
     Targets[TargetIndex] = Target;
   }
   bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
-  IceString getName(const Cfg *Func) const;
+  SizeT getId() const { return Id; }
+  SizeT getNumTargets() const { return NumTargets; }
+  CfgNode *getTarget(SizeT I) const {
+    assert(I < NumTargets);
+    return Targets[I];
+  }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return Inst->getKind() == JumpTable; }
 
+  static IceString makeName(const IceString &FuncName, SizeT Id) {
+    return ".L" + FuncName + "$jumptable$__" + std::to_string(Id);
+  }
+
 private:
   InstJumpTable(Cfg *Func, SizeT NumTargets, CfgNode *Default);
   void destroy(Cfg *Func) override {
@@ -932,7 +939,7 @@
     Inst::destroy(Func);
   }
 
-  const SizeT LabelNumber;
+  const SizeT Id;
   const SizeT NumTargets;
   CfgNode **Targets;
 };
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index a4830ef..3b067cf 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -294,47 +294,53 @@
   InstX86Br &operator=(const InstX86Br &) = delete;
 
 public:
+  enum Mode { Near, Far };
+
   /// Create a conditional branch to a node.
   static InstX86Br *
   create(Cfg *Func, CfgNode *TargetTrue, CfgNode *TargetFalse,
-         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition) {
+         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
+         Mode Kind) {
     assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
     const InstX86Label<Machine> *NoLabel = nullptr;
     return new (Func->allocate<InstX86Br>())
-        InstX86Br(Func, TargetTrue, TargetFalse, NoLabel, Condition);
+        InstX86Br(Func, TargetTrue, TargetFalse, NoLabel, Condition, Kind);
   }
   /// Create an unconditional branch to a node.
-  static InstX86Br *create(Cfg *Func, CfgNode *Target) {
+  static InstX86Br *create(Cfg *Func, CfgNode *Target, Mode Kind) {
     const CfgNode *NoCondTarget = nullptr;
     const InstX86Label<Machine> *NoLabel = nullptr;
     return new (Func->allocate<InstX86Br>())
         InstX86Br(Func, NoCondTarget, Target, NoLabel,
-                  InstX86Base<Machine>::Traits::Cond::Br_None);
+                  InstX86Base<Machine>::Traits::Cond::Br_None, Kind);
   }
   /// Create a non-terminator conditional branch to a node, with a
   /// fallthrough to the next instruction in the current node.  This is
   /// used for switch lowering.
   static InstX86Br *
   create(Cfg *Func, CfgNode *Target,
-         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition) {
+         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
+         Mode Kind) {
     assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
     const CfgNode *NoUncondTarget = nullptr;
     const InstX86Label<Machine> *NoLabel = nullptr;
     return new (Func->allocate<InstX86Br>())
-        InstX86Br(Func, Target, NoUncondTarget, NoLabel, Condition);
+        InstX86Br(Func, Target, NoUncondTarget, NoLabel, Condition, Kind);
   }
   /// Create a conditional intra-block branch (or unconditional, if
   /// Condition==Br_None) to a label in the current block.
   static InstX86Br *
   create(Cfg *Func, InstX86Label<Machine> *Label,
-         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition) {
+         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
+         Mode Kind) {
     const CfgNode *NoCondTarget = nullptr;
     const CfgNode *NoUncondTarget = nullptr;
     return new (Func->allocate<InstX86Br>())
-        InstX86Br(Func, NoCondTarget, NoUncondTarget, Label, Condition);
+        InstX86Br(Func, NoCondTarget, NoUncondTarget, Label, Condition, Kind);
   }
   const CfgNode *getTargetTrue() const { return TargetTrue; }
   const CfgNode *getTargetFalse() const { return TargetFalse; }
+  bool isNear() const { return Kind == Near; }
   bool optimizeBranch(const CfgNode *NextNode);
   uint32_t getEmitInstCount() const override {
     uint32_t Sum = 0;
@@ -360,12 +366,14 @@
 private:
   InstX86Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
             const InstX86Label<Machine> *Label,
-            typename InstX86Base<Machine>::Traits::Cond::BrCond Condition);
+            typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
+            Mode Kind);
 
   typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
   const CfgNode *TargetTrue;
   const CfgNode *TargetFalse;
   const InstX86Label<Machine> *Label; // Intra-block branch target
+  const Mode Kind;
 };
 
 /// Jump to a target outside this function, such as tailcall, nacljump,
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index f80073e..137e295 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -105,10 +105,10 @@
 InstX86Br<Machine>::InstX86Br(
     Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
     const InstX86Label<Machine> *Label,
-    typename InstX86Base<Machine>::Traits::Cond::BrCond Condition)
+    typename InstX86Base<Machine>::Traits::Cond::BrCond Condition, Mode Kind)
     : InstX86Base<Machine>(Func, InstX86Base<Machine>::Br, 0, nullptr),
       Condition(Condition), TargetTrue(TargetTrue), TargetFalse(TargetFalse),
-      Label(Label) {}
+      Label(Label), Kind(Kind) {}
 
 template <class Machine>
 bool InstX86Br<Machine>::optimizeBranch(const CfgNode *NextNode) {
@@ -404,7 +404,7 @@
 void InstX86Label<Machine>::emitIAS(const Cfg *Func) const {
   typename InstX86Base<Machine>::Traits::Assembler *Asm =
       Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-  Asm->BindLocalLabel(Number);
+  Asm->bindLocalLabel(Number);
 }
 
 template <class Machine>
@@ -446,31 +446,26 @@
   typename InstX86Base<Machine>::Traits::Assembler *Asm =
       Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
   if (Label) {
-    class Label *L = Asm->GetOrCreateLocalLabel(Label->getNumber());
-    // In all these cases, local Labels should only be used for Near.
-    const bool Near = true;
+    class Label *L = Asm->getOrCreateLocalLabel(Label->getNumber());
     if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
-      Asm->jmp(L, Near);
+      Asm->jmp(L, isNear());
     } else {
-      Asm->j(Condition, L, Near);
+      Asm->j(Condition, L, isNear());
     }
   } else {
-    // Pessimistically assume it's far. This only affects Labels that
-    // are not Bound.
-    const bool Near = false;
     if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
       class Label *L =
-          Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
+          Asm->getOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
       assert(!getTargetTrue());
-      Asm->jmp(L, Near);
+      Asm->jmp(L, isNear());
     } else {
       class Label *L =
-          Asm->GetOrCreateCfgNodeLabel(getTargetTrue()->getIndex());
-      Asm->j(Condition, L, Near);
+          Asm->getOrCreateCfgNodeLabel(getTargetTrue()->getIndex());
+      Asm->j(Condition, L, isNear());
       if (getTargetFalse()) {
         class Label *L2 =
-            Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
-        Asm->jmp(L2, Near);
+            Asm->getOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
+        Asm->jmp(L2, isNear());
       }
     }
   }
@@ -498,6 +493,8 @@
       Str << ", label %" << getTargetFalse()->getName();
     }
   }
+
+  Str << " // (" << (isNear() ? "near" : "far") << " jump)";
 }
 
 template <class Machine> void InstX86Jmp<Machine>::emit(const Cfg *Func) const {
diff --git a/src/IceSwitchLowering.cpp b/src/IceSwitchLowering.cpp
index 37e67c1..53f7890 100644
--- a/src/IceSwitchLowering.cpp
+++ b/src/IceSwitchLowering.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 #include "IceSwitchLowering.h"
 
+#include "IceCfgNode.h"
 #include "IceTargetLowering.h"
 
 #include <algorithm>
@@ -79,9 +80,11 @@
     // Case.High could be UINT64_MAX which makes the loop awkward. Unwrap the
     // last iteration to avoid wrap around problems.
     for (uint64_t I = Case.Low; I < Case.High; ++I)
-      JumpTable->addTarget(I - MinValue, Case.Label);
-    JumpTable->addTarget(Case.High - MinValue, Case.Label);
+      JumpTable->addTarget(I - MinValue, Case.Target);
+    JumpTable->addTarget(Case.High - MinValue, Case.Target);
+    Case.Target->setNeedsAlignment();
   }
+  Func->addJumpTable(JumpTable);
 
   CaseClusters.clear();
   CaseClusters.emplace_back(MinValue, MaxValue, JumpTable);
@@ -91,7 +94,7 @@
 
 bool CaseCluster::tryAppend(const CaseCluster &New) {
   // Can only append ranges with the same target and are adjacent
-  bool CanAppend = this->Label == New.Label && this->High + 1 == New.Low;
+  bool CanAppend = this->Target == New.Target && this->High + 1 == New.Low;
   if (CanAppend)
     this->High = New.High;
   return CanAppend;
diff --git a/src/IceSwitchLowering.h b/src/IceSwitchLowering.h
index 5dfcec3..34c9373 100644
--- a/src/IceSwitchLowering.h
+++ b/src/IceSwitchLowering.h
@@ -14,8 +14,7 @@
 #ifndef SUBZERO_SRC_ICESWITCHLOWERING_H
 #define SUBZERO_SRC_ICESWITCHLOWERING_H
 
-#include "IceCfgNode.h"
-#include "IceInst.h"
+#include "IceDefs.h"
 
 namespace Ice {
 
@@ -38,8 +37,8 @@
   CaseCluster &operator=(const CaseCluster &) = default;
 
   /// Create a cluster of a single case represented by a unitary range.
-  CaseCluster(uint64_t Value, CfgNode *Label)
-      : Kind(Range), Low(Value), High(Value), Label(Label) {}
+  CaseCluster(uint64_t Value, CfgNode *Target)
+      : Kind(Range), Low(Value), High(Value), Target(Target) {}
   /// Create a case consisting of a jump table.
   CaseCluster(uint64_t Low, uint64_t High, InstJumpTable *JT)
       : Kind(JumpTable), Low(Low), High(High), JT(JT) {}
@@ -47,15 +46,18 @@
   CaseClusterKind getKind() const { return Kind; }
   uint64_t getLow() const { return Low; }
   uint64_t getHigh() const { return High; }
-  CfgNode *getLabel() const {
+  CfgNode *getTarget() const {
     assert(Kind == Range);
-    return Label;
+    return Target;
   }
   InstJumpTable *getJumpTable() const {
     assert(Kind == JumpTable);
     return JT;
   }
 
+  bool isUnitRange() const { return Low == High; }
+  bool isPairRange() const { return Low == High - 1; }
+
   /// Discover cases which can be clustered together and return the clusters
   /// ordered by case value.
   static CaseClusterArray clusterizeSwitch(Cfg *Func, const InstSwitch *Inst);
@@ -65,7 +67,7 @@
   uint64_t Low;
   uint64_t High;
   union {
-    CfgNode *Label;    /// Target for a range.
+    CfgNode *Target;   /// Target for a range.
     InstJumpTable *JT; /// Jump table targets.
   };
 
@@ -73,6 +75,36 @@
   bool tryAppend(const CaseCluster &New);
 };
 
+/// Store the jump table data so that it can be emitted later in the correct
+/// ELF section once the offsets from the start of the function are known.
+class JumpTableData {
+  JumpTableData() = delete;
+  JumpTableData(const JumpTableData &) = delete;
+  JumpTableData &operator=(const JumpTableData &) = delete;
+
+public:
+  JumpTableData(IceString FuncName, SizeT Id, SizeT NumTargets)
+      : FuncName(FuncName), Id(Id) {
+    TargetOffsets.reserve(NumTargets);
+  }
+  JumpTableData(JumpTableData &&) = default;
+
+  void pushTarget(intptr_t Offset) { TargetOffsets.emplace_back(Offset); }
+
+  const IceString &getFunctionName() const { return FuncName; }
+  SizeT getId() const { return Id; }
+  const std::vector<intptr_t> &getTargetOffsets() const {
+    return TargetOffsets;
+  }
+
+private:
+  const IceString FuncName;
+  const SizeT Id;
+  std::vector<intptr_t> TargetOffsets;
+};
+
+using JumpTableDataList = std::vector<JumpTableData>;
+
 } // end of namespace Ice
 
 #endif //  SUBZERO_SRC_ICESWITCHLOWERING_H
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 018a555..c7ef918 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -177,6 +177,7 @@
   void updateStackAdjustment(int32_t Offset) { StackAdjustment += Offset; }
   void resetStackAdjustment() { StackAdjustment = 0; }
   SizeT makeNextLabelNumber() { return NextLabelNumber++; }
+  SizeT makeNextJumpTableNumber() { return NextJumpTableNumber++; }
   LoweringContext &getContext() { return Context; }
 
   enum RegSet {
@@ -212,6 +213,8 @@
   /// Get the minimum number of clusters required for a jump table to be
   /// considered.
   virtual SizeT getMinJumpTableSize() const = 0;
+  virtual void emitJumpTable(const Cfg *Func,
+                             const InstJumpTable *JumpTable) const = 0;
 
   virtual void emitVariable(const Variable *Var) const = 0;
 
@@ -338,6 +341,7 @@
   /// natural location, as arguments are pushed for a function call.
   int32_t StackAdjustment = 0;
   SizeT NextLabelNumber = 0;
+  SizeT NextJumpTableNumber = 0;
   LoweringContext Context;
 
   // Runtime helper function names
@@ -397,6 +401,7 @@
   virtual void lowerGlobals(const VariableDeclarationList &Vars,
                             const IceString &SectionSuffix) = 0;
   virtual void lowerConstants() = 0;
+  virtual void lowerJumpTables() = 0;
 
 protected:
   void emitGlobal(const VariableDeclaration &Var,
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 0925467..5a87a0b 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -383,6 +383,12 @@
   return Reg;
 }
 
+void TargetARM32::emitJumpTable(const Cfg *Func,
+                                const InstJumpTable *JumpTable) const {
+  (void)JumpTable;
+  UnimplementedError(Func->getContext()->getFlags());
+}
+
 void TargetARM32::emitVariable(const Variable *Var) const {
   Ostream &Str = Ctx->getStrEmit();
   if (Var->hasReg()) {
@@ -2709,6 +2715,12 @@
   UnimplementedError(Ctx->getFlags());
 }
 
+void TargetDataARM32::lowerJumpTables() {
+  if (Ctx->getFlags().getDisableTranslation())
+    return;
+  UnimplementedError(Ctx->getFlags());
+}
+
 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
     : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
 
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 9727bd9..58deb24 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -80,6 +80,8 @@
 
   // TODO(ascull): what size is best for ARM?
   SizeT getMinJumpTableSize() const override { return 3; }
+  void emitJumpTable(const Cfg *Func,
+                     const InstJumpTable *JumpTable) const override;
 
   void emitVariable(const Variable *Var) const override;
 
@@ -432,6 +434,7 @@
   void lowerGlobals(const VariableDeclarationList &Vars,
                     const IceString &SectionSuffix) override;
   void lowerConstants() override;
+  void lowerJumpTables() override;
 
 protected:
   explicit TargetDataARM32(GlobalContext *Ctx);
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 99c9928..d55fce6 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -254,6 +254,12 @@
   return Reg;
 }
 
+void TargetMIPS32::emitJumpTable(const Cfg *Func,
+                                 const InstJumpTable *JumpTable) const {
+  (void)JumpTable;
+  UnimplementedError(Func->getContext()->getFlags());
+}
+
 void TargetMIPS32::emitVariable(const Variable *Var) const {
   Ostream &Str = Ctx->getStrEmit();
   (void)Var;
@@ -703,6 +709,12 @@
   UnimplementedError(Ctx->getFlags());
 }
 
+void TargetDataMIPS32::lowerJumpTables() {
+  if (Ctx->getFlags().getDisableTranslation())
+    return;
+  UnimplementedError(Ctx->getFlags());
+}
+
 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
     : TargetHeaderLowering(Ctx) {}
 
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 5ec9c0b..76a5239 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -56,6 +56,8 @@
 
   // TODO(ascull): what is the best size of MIPS?
   SizeT getMinJumpTableSize() const override { return 3; }
+  void emitJumpTable(const Cfg *Func,
+                     const InstJumpTable *JumpTable) const override;
 
   void emitVariable(const Variable *Var) const override;
 
@@ -145,6 +147,7 @@
   void lowerGlobals(const VariableDeclarationList &Vars,
                     const IceString &SectionSuffix) override;
   void lowerConstants() override;
+  void lowerJumpTables() override;
 
 protected:
   explicit TargetDataMIPS32(GlobalContext *Ctx);
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index dfa042a..e8e89dd 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -151,6 +151,23 @@
 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
 } // end of anonymous namespace
 
+void TargetX8632::emitJumpTable(const Cfg *Func,
+                                const InstJumpTable *JumpTable) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Ctx->getStrEmit();
+  IceString MangledName = Ctx->mangleName(Func->getFunctionName());
+  Str << "\t.section\t.rodata." << MangledName
+      << "$jumptable,\"a\",@progbits\n";
+  Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
+  Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
+
+  // On X8632 pointers are 32-bit hence the use of .long
+  for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
+    Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
+  Str << "\n";
+}
+
 template <typename T>
 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
   if (!BuildDefs::dump())
@@ -222,6 +239,35 @@
   }
 }
 
+void TargetDataX8632::lowerJumpTables() {
+  switch (Ctx->getFlags().getOutFileType()) {
+  case FT_Elf: {
+    ELFObjectWriter *Writer = Ctx->getObjectWriter();
+    for (const JumpTableData &JumpTable : *Ctx->getJumpTables())
+      Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
+  } break;
+  case FT_Asm:
+    // Already emitted from Cfg
+    break;
+  case FT_Iasm: {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Ctx->getStrEmit();
+    for (const JumpTableData &JT : *Ctx->getJumpTables()) {
+      Str << "\t.section\t.rodata." << JT.getFunctionName()
+          << "$jumptable,\"a\",@progbits\n";
+      Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
+      Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
+
+      // On X8632 pointers are 32-bit hence the use of .long
+      for (intptr_t TargetOffset : JT.getTargetOffsets())
+        Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
+      Str << "\n";
+    }
+  } break;
+  }
+}
+
 void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
                                    const IceString &SectionSuffix) {
   switch (Ctx->getFlags().getOutFileType()) {
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 1f1d2cb..598e6a1 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -33,6 +33,9 @@
   TargetX8632(const TargetX8632 &) = delete;
   TargetX8632 &operator=(const TargetX8632 &) = delete;
 
+  void emitJumpTable(const Cfg *Func,
+                     const InstJumpTable *JumpTable) const override;
+
 public:
   using X86InstructionSet = X8632::Traits::InstructionSet;
 
@@ -65,6 +68,7 @@
   void lowerGlobals(const VariableDeclarationList &Vars,
                     const IceString &SectionSuffix) override;
   void lowerConstants() override;
+  void lowerJumpTables() override;
 
 protected:
   explicit TargetDataX8632(GlobalContext *Ctx);
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index f2be0c1..b9bbaba 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -17,6 +17,13 @@
 
 namespace Ice {
 
+void TargetX8664::emitJumpTable(const Cfg *Func,
+                                const InstJumpTable *JumpTable) const {
+  (void)Func;
+  (void)JumpTable;
+  llvm::report_fatal_error("Not yet implemented");
+}
+
 TargetX8664 *TargetX8664::create(Cfg *) {
   llvm::report_fatal_error("Not yet implemented");
 }
@@ -29,4 +36,8 @@
   llvm::report_fatal_error("Not yet implemented");
 }
 
+void TargetDataX8664::lowerJumpTables() {
+  llvm::report_fatal_error("Not yet implemented");
+}
+
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringX8664.h b/src/IceTargetLoweringX8664.h
index 791343a..bb00a8a 100644
--- a/src/IceTargetLoweringX8664.h
+++ b/src/IceTargetLoweringX8664.h
@@ -27,6 +27,9 @@
   TargetX8664(const TargetX8664 &) = delete;
   TargetX8664 &operator=(const TargetX8664 &) = delete;
 
+  void emitJumpTable(const Cfg *Func,
+                     const InstJumpTable *JumpTable) const override;
+
 public:
   static TargetX8664 *create(Cfg *Func);
 
@@ -50,6 +53,7 @@
                     const IceString &SectionSuffix) override;
 
   void lowerConstants() override;
+  void lowerJumpTables() override;
 
 private:
   ENABLE_MAKE_UNIQUE;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 84a1e19..58c425a 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -158,6 +158,8 @@
                       Operand *Val);
   void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal,
                        Operand *SecondVal);
+  /// Lower an indirect jump adding sandboxing when needed.
+  void lowerIndirectJump(Variable *Target);
 
   /// Check the comparison is in [Min,Max]. The flags register will be modified
   /// with:
@@ -266,18 +268,21 @@
   }
   void _br(typename Traits::Cond::BrCond Condition, CfgNode *TargetTrue,
            CfgNode *TargetFalse) {
-    Context.insert(
-        Traits::Insts::Br::create(Func, TargetTrue, TargetFalse, Condition));
+    Context.insert(Traits::Insts::Br::create(
+        Func, TargetTrue, TargetFalse, Condition, Traits::Insts::Br::Far));
   }
   void _br(CfgNode *Target) {
-    Context.insert(Traits::Insts::Br::create(Func, Target));
+    Context.insert(
+        Traits::Insts::Br::create(Func, Target, Traits::Insts::Br::Far));
   }
   void _br(typename Traits::Cond::BrCond Condition, CfgNode *Target) {
-    Context.insert(Traits::Insts::Br::create(Func, Target, Condition));
+    Context.insert(Traits::Insts::Br::create(Func, Target, Condition,
+                                             Traits::Insts::Br::Far));
   }
   void _br(typename Traits::Cond::BrCond Condition,
-           typename Traits::Insts::Label *Label) {
-    Context.insert(Traits::Insts::Br::create(Func, Label, Condition));
+           typename Traits::Insts::Label *Label,
+           typename Traits::Insts::Br::Mode Kind = Traits::Insts::Br::Near) {
+    Context.insert(Traits::Insts::Br::create(Func, Label, Condition, Kind));
   }
   void _bsf(Variable *Dest, Operand *Src0) {
     Context.insert(Traits::Insts::Bsf::create(Func, Dest, Src0));
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 046d88f..b4c4ea3 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -408,9 +408,12 @@
   Func->dump("After branch optimization");
 
   // Nop insertion
-  if (Ctx->getFlags().shouldDoNopInsertion()) {
+  if (Ctx->getFlags().shouldDoNopInsertion())
     Func->doNopInsertion();
-  }
+
+  // Mark nodes that require sandbox alignment
+  if (Ctx->getFlags().getUseSandboxing())
+    Func->markNodesForSandboxing();
 }
 
 template <class Machine> void TargetX86Base<Machine>::translateOm1() {
@@ -447,6 +450,10 @@
   if (Ctx->getFlags().shouldDoNopInsertion()) {
     Func->doNopInsertion();
   }
+
+  // Mark nodes that require sandbox alignment
+  if (Ctx->getFlags().getUseSandboxing())
+    Func->markNodesForSandboxing();
 }
 
 inline bool canRMW(const InstArithmetic *Arith) {
@@ -1109,14 +1116,9 @@
   // jmp *t
   // bundle_unlock
   // FakeUse <original_ret_operand>
-  const SizeT BundleSize = 1
-                           << Func->getAssembler<>()->getBundleAlignLog2Bytes();
   Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
   _pop(T_ecx);
-  _bundle_lock();
-  _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
-  _jmp(T_ecx);
-  _bundle_unlock();
+  lowerIndirectJump(T_ecx);
   if (RI->getSrcSize()) {
     Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
     Context.insert(InstFakeUse::create(Func, RetValue));
@@ -3980,6 +3982,20 @@
   _mov(DestHi, Ctx->getConstantZero(IceType_i32));
 }
 
+template <class Machine>
+void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) {
+  const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
+  if (NeedSandboxing) {
+    _bundle_lock();
+    const SizeT BundleSize =
+        1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
+    _and(Target, Ctx->getConstantInt32(~(BundleSize - 1)));
+  }
+  _jmp(Target);
+  if (NeedSandboxing)
+    _bundle_unlock();
+}
+
 inline bool isAdd(const Inst *Inst) {
   if (const InstArithmetic *Arith =
           llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
@@ -4525,19 +4541,19 @@
 template <class Machine>
 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case,
                                               Operand *Comparison, bool DoneCmp,
-                                              CfgNode *DefaultLabel) {
+                                              CfgNode *DefaultTarget) {
   switch (Case.getKind()) {
   case CaseCluster::JumpTable: {
     typename Traits::Insts::Label *SkipJumpTable;
 
     Operand *RangeIndex =
         lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
-    if (DefaultLabel != nullptr) {
-      _br(Traits::Cond::Br_a, DefaultLabel);
-    } else {
+    if (DefaultTarget == nullptr) {
       // Skip over jump table logic if comparison not in range and no default
       SkipJumpTable = Traits::Insts::Label::create(Func, this);
       _br(Traits::Cond::Br_a, SkipJumpTable);
+    } else {
+      _br(Traits::Cond::Br_a, DefaultTarget);
     }
 
     InstJumpTable *JumpTable = Case.getJumpTable();
@@ -4554,38 +4570,44 @@
 
     constexpr RelocOffsetT RelocOffset = 0;
     constexpr bool SuppressMangling = true;
-    Constant *Base = Ctx->getConstantSym(RelocOffset, JumpTable->getName(Func),
-                                         SuppressMangling);
+    IceString MangledName = Ctx->mangleName(Func->getFunctionName());
+    Constant *Base = Ctx->getConstantSym(
+        RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()),
+        SuppressMangling);
     Constant *Offset = nullptr;
     uint16_t Shift = typeWidthInBytesLog2(getPointerType());
     // TODO(ascull): remove need for legalize by allowing null base in memop
-    auto *MemTarget = Traits::X86OperandMem::create(
+    auto *TargetInMemory = Traits::X86OperandMem::create(
         Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift);
     Variable *Target = nullptr;
-    _mov(Target, MemTarget);
-    _jmp(Target);
-    // TODO(ascull): sandboxing for indirect jump
+    _mov(Target, TargetInMemory);
+    lowerIndirectJump(Target);
 
-    if (DefaultLabel == nullptr)
+    if (DefaultTarget == nullptr)
       Context.insert(SkipJumpTable);
     return;
   }
   case CaseCluster::Range: {
-    if (Case.getHigh() == Case.getLow()) {
+    if (Case.isUnitRange()) {
       // Single item
-      Constant *Value = Ctx->getConstantInt32(Case.getLow());
-      if (!DoneCmp)
+      if (!DoneCmp) {
+        Constant *Value = Ctx->getConstantInt32(Case.getLow());
         _cmp(Comparison, Value);
-      _br(Traits::Cond::Br_e, Case.getLabel());
-      if (DefaultLabel != nullptr)
-        _br(DefaultLabel);
+      }
+      _br(Traits::Cond::Br_e, Case.getTarget());
+    } else if (DoneCmp && Case.isPairRange()) {
+      // Range of two items with first item aleady compared against
+      _br(Traits::Cond::Br_e, Case.getTarget());
+      Constant *Value = Ctx->getConstantInt32(Case.getHigh());
+      _cmp(Comparison, Value);
+      _br(Traits::Cond::Br_e, Case.getTarget());
     } else {
       // Range
       lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
-      _br(Traits::Cond::Br_be, Case.getLabel());
-      if (DefaultLabel != nullptr)
-        _br(DefaultLabel);
+      _br(Traits::Cond::Br_be, Case.getTarget());
     }
+    if (DefaultTarget != nullptr)
+      _br(DefaultTarget);
     return;
   }
   }
@@ -4593,57 +4615,10 @@
 
 template <class Machine>
 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
-  // Do it the old fashioned way unless asked for the advanced method
-  if (!Ctx->getFlags().getUseAdvancedSwitchLowering()) {
-    // This implements the most naive possible lowering.
-    // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
-    Operand *Src0 = Inst->getComparison();
-    SizeT NumCases = Inst->getNumCases();
-    if (Src0->getType() == IceType_i64) {
-      Src0 = legalize(Src0); // get Base/Index into physical registers
-      Operand *Src0Lo = loOperand(Src0);
-      Operand *Src0Hi = hiOperand(Src0);
-      if (NumCases >= 2) {
-        Src0Lo = legalizeToReg(Src0Lo);
-        Src0Hi = legalizeToReg(Src0Hi);
-      } else {
-        Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
-        Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
-      }
-      for (SizeT I = 0; I < NumCases; ++I) {
-        Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
-        Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
-        typename Traits::Insts::Label *Label =
-            Traits::Insts::Label::create(Func, this);
-        _cmp(Src0Lo, ValueLo);
-        _br(Traits::Cond::Br_ne, Label);
-        _cmp(Src0Hi, ValueHi);
-        _br(Traits::Cond::Br_e, Inst->getLabel(I));
-        Context.insert(Label);
-      }
-      _br(Inst->getLabelDefault());
-      return;
-    }
-    // OK, we'll be slightly less naive by forcing Src into a physical
-    // register if there are 2 or more uses.
-    if (NumCases >= 2)
-      Src0 = legalizeToReg(Src0);
-    else
-      Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
-    for (SizeT I = 0; I < NumCases; ++I) {
-      Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
-      _cmp(Src0, Value);
-      _br(Traits::Cond::Br_e, Inst->getLabel(I));
-    }
-
-    _br(Inst->getLabelDefault());
-    return;
-  }
-
   // Group cases together and navigate through them with a binary search
   CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
   Operand *Src0 = Inst->getComparison();
-  CfgNode *DefaultLabel = Inst->getLabelDefault();
+  CfgNode *DefaultTarget = Inst->getLabelDefault();
 
   assert(CaseClusters.size() != 0); // Should always be at least one
 
@@ -4681,7 +4656,7 @@
       Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
       Constant *Zero = Ctx->getConstantInt32(0);
       _cmp(Src0Hi, Zero);
-      _br(Traits::Cond::Br_ne, DefaultLabel);
+      _br(Traits::Cond::Br_ne, DefaultTarget);
       Src0 = Src0Lo;
     }
   }
@@ -4692,7 +4667,7 @@
     // Jump straight to default if needed. Currently a common case as jump
     // tables occur on their own.
     constexpr bool DoneCmp = false;
-    lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultLabel);
+    lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget);
     return;
   }
 
@@ -4727,27 +4702,44 @@
 
     case 1:
       lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
-                       SearchSpanStack.empty() ? nullptr : DefaultLabel);
+                       SearchSpanStack.empty() ? nullptr : DefaultTarget);
       DoneCmp = false;
       break;
 
-    case 2:
-      lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp);
+    case 2: {
+      const CaseCluster *CaseA = &CaseClusters[Span.Begin];
+      const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1];
+
+      // Placing a range last may allow register clobbering during the range
+      // test. That means there is no need to clone the register. If it is a
+      // unit range the comparison may have already been done in the binary
+      // search (DoneCmp) and so it should be placed first. If this is a range
+      // of two items and the comparison with the low value has already been
+      // done, comparing with the other element is cheaper than a range test.
+      // If the low end of the range is zero then there is no subtraction and
+      // nothing to be gained.
+      if (!CaseA->isUnitRange() &&
+          !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) {
+        std::swap(CaseA, CaseB);
+        DoneCmp = false;
+      }
+
+      lowerCaseCluster(*CaseA, Comparison, DoneCmp);
       DoneCmp = false;
-      lowerCaseCluster(CaseClusters[Span.Begin + 1], Comparison, DoneCmp,
-                       SearchSpanStack.empty() ? nullptr : DefaultLabel);
-      break;
+      lowerCaseCluster(*CaseB, Comparison, DoneCmp,
+                       SearchSpanStack.empty() ? nullptr : DefaultTarget);
+    } break;
 
     default:
       // Pick the middle item and branch b or ae
       SizeT PivotIndex = Span.Begin + (Span.Size / 2);
       const CaseCluster &Pivot = CaseClusters[PivotIndex];
       Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
-      // TODO(ascull): what if this jump is too big?
       typename Traits::Insts::Label *Label =
           Traits::Insts::Label::create(Func, this);
       _cmp(Comparison, Value);
-      _br(Traits::Cond::Br_b, Label);
+      // TODO(ascull): does it alway have to be far?
+      _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far);
       // Lower the left and (pivot+right) sides, falling through to the right
       SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
       SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
@@ -4756,7 +4748,7 @@
     }
   }
 
-  _br(DefaultLabel);
+  _br(DefaultTarget);
 }
 
 template <class Machine>