Introduction of improved switch lowering.

This includes the high level analysis of switches, the x86 lowering,
the repointing of targets in jump tables and ASM emission of jump
tables.

The technique uses jump tables, range test and binary search with
worst case O(lg n) which improves the previous worst case of O(n)
from a sequential search.

Use is hidden by the --adv-switch flag as the IAS emission still
needs to be implemented.

BUG=None
R=jvoung@chromium.org, stichnot@chromium.org

Review URL: https://codereview.chromium.org/1234803007.
diff --git a/Makefile.standalone b/Makefile.standalone
index 47afc8b..ccb202e 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -196,6 +196,7 @@
 	IceOperand.cpp \
 	IceRegAlloc.cpp \
 	IceRNG.cpp \
+	IceSwitchLowering.cpp \
 	IceTargetLowering.cpp \
 	IceTargetLoweringARM32.cpp \
 	IceTargetLoweringMIPS32.cpp \
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 3f8b39b..60214c7 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -213,15 +213,9 @@
 // parameter is only used to make the new node's name unique when
 // there are multiple edges between the same pair of nodes.)  The new
 // node's instruction list is initialized to the empty list, with no
-// terminator instruction.  If there are multiple edges from Pred to
-// this node, only one edge is split, and the particular choice of
-// edge is undefined.  This could happen with a switch instruction, or
-// a conditional branch that weirdly has both branches to the same
-// place.  TODO(stichnot,kschimpf): Figure out whether this is legal
-// in the LLVM IR or the PNaCl bitcode, and if so, we need to
-// establish a strong relationship among the ordering of Pred's
-// out-edge list, this node's in-edge list, and the Phi instruction's
-// operand list.
+// terminator instruction. There must not be multiple edges from Pred
+// to this node so all Inst::getTerminatorEdges implementations must
+// not contain duplicates.
 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
   CfgNode *NewNode = Func->makeNode();
   if (BuildDefs::dump())
@@ -232,32 +226,31 @@
   NewNode->setNeedsPlacement(true);
   // Repoint Pred's out-edge.
   bool Found = false;
-  for (auto I = Pred->OutEdges.begin(), E = Pred->OutEdges.end();
-       !Found && I != E; ++I) {
-    if (*I == this) {
-      *I = NewNode;
+  for (CfgNode *&I : Pred->OutEdges) {
+    if (I == this) {
+      I = NewNode;
       NewNode->InEdges.push_back(Pred);
       Found = true;
+      break;
     }
   }
   assert(Found);
   // Repoint this node's in-edge.
   Found = false;
-  for (auto I = InEdges.begin(), E = InEdges.end(); !Found && I != E; ++I) {
-    if (*I == Pred) {
-      *I = NewNode;
+  for (CfgNode *&I : InEdges) {
+    if (I == Pred) {
+      I = NewNode;
       NewNode->OutEdges.push_back(this);
       Found = true;
+      break;
     }
   }
   assert(Found);
-  // Repoint a suitable branch instruction's target and return.
+  // Repoint all suitable branch instructions' target and return.
   Found = false;
-  for (Inst &I : reverse_range(Pred->getInsts())) {
-    if (!I.isDeleted() && I.repointEdge(this, NewNode))
-      return NewNode;
-  }
-  // This should be unreachable, so the assert will fail.
+  for (Inst &I : Pred->getInsts())
+    if (!I.isDeleted() && I.repointEdges(this, NewNode))
+      Found = true;
   assert(Found);
   return NewNode;
 }
@@ -752,7 +745,7 @@
       }
       for (Inst &I : Pred->getInsts()) {
         if (!I.isDeleted())
-          I.repointEdge(this, OutEdges.front());
+          I.repointEdges(this, OutEdges.front());
       }
     }
   }
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index 2d2e641..746cb3e 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -176,6 +176,10 @@
     TranslateOnly("translate-only",
                   cl::desc("Translate only the given function"), cl::init(""));
 
+cl::opt<bool>
+    UseAdvancedSwitchLowering("adv-switch",
+                              cl::desc("Use advanced switch lowering"));
+
 cl::opt<bool> UseSandboxing("sandbox", cl::desc("Use sandboxing"));
 
 cl::opt<std::string> VerboseFocusOn(
@@ -341,6 +345,7 @@
   OutFlags.SkipUnimplemented = false;
   OutFlags.SubzeroTimingEnabled = false;
   OutFlags.TimeEachFunction = false;
+  OutFlags.UseAdvancedSwitchLowering = false;
   OutFlags.UseSandboxing = false;
   // Enum and integer fields.
   OutFlags.Opt = Opt_m1;
@@ -410,6 +415,7 @@
   OutFlags.setTimeEachFunction(::TimeEachFunction);
   OutFlags.setTimingFocusOn(::TimingFocusOn);
   OutFlags.setTranslateOnly(::TranslateOnly);
+  OutFlags.setUseAdvancedSwitchLowering(::UseAdvancedSwitchLowering);
   OutFlags.setUseSandboxing(::UseSandboxing);
   OutFlags.setVerboseFocusOn(::VerboseFocusOn);
   OutFlags.setOutFileType(::OutFileType);
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 549bf16..97420ea 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -103,6 +103,13 @@
   }
   void setTimeEachFunction(bool NewValue) { TimeEachFunction = NewValue; }
 
+  bool getUseAdvancedSwitchLowering() const {
+    return UseAdvancedSwitchLowering;
+  }
+  void setUseAdvancedSwitchLowering(bool NewValue) {
+    UseAdvancedSwitchLowering = NewValue;
+  }
+
   bool getUseSandboxing() const { return UseSandboxing; }
   void setUseSandboxing(bool NewValue) { UseSandboxing = NewValue; }
 
@@ -235,6 +242,7 @@
   bool SkipUnimplemented;
   bool SubzeroTimingEnabled;
   bool TimeEachFunction;
+  bool UseAdvancedSwitchLowering;
   bool UseSandboxing;
 
   OptLevel Opt;
diff --git a/src/IceELFObjectWriter.h b/src/IceELFObjectWriter.h
index 0562c9b..c2d7157 100644
--- a/src/IceELFObjectWriter.h
+++ b/src/IceELFObjectWriter.h
@@ -146,7 +146,8 @@
   void assignRelLinkNum(SizeT SymTabNumber, RelSectionList &RelSections);
 
   /// Helper function for writeDataSection. Writes a data section of type
-  /// SectionType, given the global variables Vars belonging to that SectionType.
+  /// SectionType, given the global variables Vars belonging to that
+  /// SectionType.
   void writeDataOfType(SectionType SectionType,
                        const VariableDeclarationList &Vars,
                        FixupKind RelocationKind,
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index ecfb1b6..7f49835 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -19,6 +19,7 @@
 #include "IceCfgNode.h"
 #include "IceLiveness.h"
 #include "IceOperand.h"
+#include "IceTargetLowering.h"
 
 namespace Ice {
 
@@ -295,15 +296,17 @@
   return OutEdges;
 }
 
-bool InstBr::repointEdge(CfgNode *OldNode, CfgNode *NewNode) {
+bool InstBr::repointEdges(CfgNode *OldNode, CfgNode *NewNode) {
+  bool Found = false;
   if (TargetFalse == OldNode) {
     TargetFalse = NewNode;
-    return true;
-  } else if (TargetTrue == OldNode) {
-    TargetTrue = NewNode;
-    return true;
+    Found = true;
   }
-  return false;
+  if (TargetTrue == OldNode) {
+    TargetTrue = NewNode;
+    Found = true;
+  }
+  return Found;
 }
 
 InstCast::InstCast(Cfg *Func, OpKind CastKind, Variable *Dest, Operand *Source)
@@ -461,21 +464,28 @@
   for (SizeT I = 0; I < NumCases; ++I) {
     OutEdges.push_back(Labels[I]);
   }
+  std::sort(OutEdges.begin(), OutEdges.end(),
+            [](const CfgNode *x, const CfgNode *y) {
+              return x->getIndex() < y->getIndex();
+            });
+  auto Last = std::unique(OutEdges.begin(), OutEdges.end());
+  OutEdges.erase(Last, OutEdges.end());
   return OutEdges;
 }
 
-bool InstSwitch::repointEdge(CfgNode *OldNode, CfgNode *NewNode) {
+bool InstSwitch::repointEdges(CfgNode *OldNode, CfgNode *NewNode) {
+  bool Found = false;
   if (LabelDefault == OldNode) {
     LabelDefault = NewNode;
-    return true;
+    Found = true;
   }
   for (SizeT I = 0; I < NumCases; ++I) {
     if (Labels[I] == OldNode) {
       Labels[I] = NewNode;
-      return true;
+      Found = true;
     }
   }
-  return false;
+  return Found;
 }
 
 InstUnreachable::InstUnreachable(Cfg *Func)
@@ -504,6 +514,31 @@
 InstFakeKill::InstFakeKill(Cfg *Func, const Inst *Linked)
     : InstHighLevel(Func, Inst::FakeKill, 0, nullptr), Linked(Linked) {}
 
+InstJumpTable::InstJumpTable(Cfg *Func, SizeT NumTargets, CfgNode *Default)
+    : InstHighLevel(Func, Inst::JumpTable, 1, nullptr),
+      LabelNumber(Func->getTarget()->makeNextLabelNumber()),
+      NumTargets(NumTargets) {
+  Targets = Func->allocateArrayOf<CfgNode *>(NumTargets);
+  for (SizeT I = 0; I < NumTargets; ++I)
+    Targets[I] = Default;
+}
+
+bool InstJumpTable::repointEdges(CfgNode *OldNode, CfgNode *NewNode) {
+  bool Found = false;
+  for (SizeT I = 0; I < NumTargets; ++I) {
+    if (Targets[I] == OldNode) {
+      Targets[I] = NewNode;
+      Found = true;
+    }
+  }
+  return Found;
+}
+
+IceString InstJumpTable::getName(const Cfg *Func) const {
+  return ".L" + Func->getFunctionName() + "$jumptable$__" +
+         std::to_string(LabelNumber);
+}
+
 Type InstCall::getReturnType() const {
   if (Dest == nullptr)
     return IceType_void;
@@ -917,6 +952,39 @@
   Str << "kill.pseudo scratch_regs";
 }
 
+void InstJumpTable::emit(const Cfg *Func) const {
+  // TODO(ascull): should this be a target specific lowering (with access built
+  // in?) and just have InstJumpTable as a high level, similar to br? or should
+  // this follow the same path as emitIAS i.e. put it in global context and
+  // produce this code later?
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  // TODO(ascull): softcode pointer size of 4
+  // TODO(ascull): is .long portable?
+  Str << "\n\t.section\t.rodata." << Func->getFunctionName()
+      << "$jumptable,\"a\",@progbits\n"
+      << "\t.align 4\n" << getName(Func) << ":";
+  for (SizeT I = 0; I < NumTargets; ++I)
+    Str << "\n\t.long\t" << Targets[I]->getAsmName();
+  Str << "\n\n\t.text";
+}
+
+void InstJumpTable::emitIAS(const Cfg *Func) const {
+  // TODO(ascull): put jump table in the global context for emission later
+  (void)Func;
+}
+
+void InstJumpTable::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "jump table [";
+  for (SizeT I = 0; I < NumTargets; ++I)
+    Str << "\n    " << Targets[I]->getName();
+  Str << "\n  ]";
+}
+
 void InstTarget::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
diff --git a/src/IceInst.h b/src/IceInst.h
index cfa6dd3..48b18f4 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -67,6 +67,7 @@
     FakeDef,      // not part of LLVM/PNaCl bitcode
     FakeUse,      // not part of LLVM/PNaCl bitcode
     FakeKill,     // not part of LLVM/PNaCl bitcode
+    JumpTable,    // not part of LLVM/PNaCl bitcode
     Target        // target-specific low-level ICE
                   // Anything >= Target is an InstTarget subclass.
                   // Note that the value-spaces are shared across targets.
@@ -108,6 +109,7 @@
 
   /// Returns a list of out-edges corresponding to a terminator
   /// instruction, which is the last instruction of the block.
+  /// The list must not contain duplicates.
   virtual NodeList getTerminatorEdges() const {
     // All valid terminator instructions override this method.  For
     // the default implementation, we assert in case some CfgNode
@@ -119,9 +121,8 @@
   virtual bool isUnconditionalBranch() const { return false; }
   /// If the instruction is a branch-type instruction with OldNode as a
   /// target, repoint it to NewNode and return true, otherwise return
-  /// false.  Only repoint one instance, even if the instruction has
-  /// multiple instances of OldNode as a target.
-  virtual bool repointEdge(CfgNode *OldNode, CfgNode *NewNode) {
+  /// false. Repoint all instances of OldNode as a target.
+  virtual bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) {
     (void)OldNode;
     (void)NewNode;
     return false;
@@ -352,7 +353,7 @@
   }
   NodeList getTerminatorEdges() const override;
   bool isUnconditionalBranch() const override { return isUnconditional(); }
-  bool repointEdge(CfgNode *OldNode, CfgNode *NewNode) override;
+  bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return Inst->getKind() == Br; }
 
@@ -727,7 +728,7 @@
   }
   void addBranch(SizeT CaseIndex, uint64_t Value, CfgNode *Label);
   NodeList getTerminatorEdges() const override;
-  bool repointEdge(CfgNode *OldNode, CfgNode *NewNode) override;
+  bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return Inst->getKind() == Switch; }
 
@@ -899,6 +900,43 @@
   const Inst *Linked;
 };
 
+/// JumpTable instruction. This represents a jump table that will be
+/// stored in the .rodata section. This is used to track and repoint
+/// the target CfgNodes which may change, for example due to
+/// splitting for phi lowering.
+class InstJumpTable : public InstHighLevel {
+  InstJumpTable() = delete;
+  InstJumpTable(const InstJumpTable &) = delete;
+  InstJumpTable &operator=(const InstJumpTable &) = delete;
+
+public:
+  static InstJumpTable *create(Cfg *Func, SizeT NumTargets, CfgNode *Default) {
+    return new (Func->allocate<InstJumpTable>())
+        InstJumpTable(Func, NumTargets, Default);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void addTarget(SizeT TargetIndex, CfgNode *Target) {
+    assert(TargetIndex < NumTargets);
+    Targets[TargetIndex] = Target;
+  }
+  bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
+  IceString getName(const Cfg *Func) const;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) { return Inst->getKind() == JumpTable; }
+
+private:
+  InstJumpTable(Cfg *Func, SizeT NumTargets, CfgNode *Default);
+  void destroy(Cfg *Func) override {
+    Func->deallocateArrayOf<CfgNode *>(Targets);
+    Inst::destroy(Func);
+  }
+
+  const SizeT LabelNumber;
+  const SizeT NumTargets;
+  CfgNode **Targets;
+};
+
 /// The Target instruction is the base class for all target-specific
 /// instructions.
 class InstTarget : public Inst {
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index e95f6b1..ddc94a7 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -277,15 +277,17 @@
   return false;
 }
 
-bool InstARM32Br::repointEdge(CfgNode *OldNode, CfgNode *NewNode) {
+bool InstARM32Br::repointEdges(CfgNode *OldNode, CfgNode *NewNode) {
+  bool Found = false;
   if (TargetFalse == OldNode) {
     TargetFalse = NewNode;
-    return true;
-  } else if (TargetTrue == OldNode) {
-    TargetTrue = NewNode;
-    return true;
+    Found = true;
   }
-  return false;
+  if (TargetTrue == OldNode) {
+    TargetTrue = NewNode;
+    Found = true;
+  }
+  return Found;
 }
 
 InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 8a7e1da..030c194 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -742,7 +742,7 @@
   bool isUnconditionalBranch() const override {
     return getPredicate() == CondARM32::AL;
   }
-  bool repointEdge(CfgNode *OldNode, CfgNode *NewNode) override;
+  bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
   void emit(const Cfg *Func) const override;
   void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 7275e44..a4830ef 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -349,7 +349,7 @@
   bool isUnconditionalBranch() const override {
     return !Label && Condition == InstX86Base<Machine>::Traits::Cond::Br_None;
   }
-  bool repointEdge(CfgNode *OldNode, CfgNode *NewNode) override;
+  bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
   void emit(const Cfg *Func) const override;
   void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 2d9ee0f..f80073e 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -152,15 +152,17 @@
 }
 
 template <class Machine>
-bool InstX86Br<Machine>::repointEdge(CfgNode *OldNode, CfgNode *NewNode) {
+bool InstX86Br<Machine>::repointEdges(CfgNode *OldNode, CfgNode *NewNode) {
+  bool Found = false;
   if (TargetFalse == OldNode) {
     TargetFalse = NewNode;
-    return true;
-  } else if (TargetTrue == OldNode) {
-    TargetTrue = NewNode;
-    return true;
+    Found = true;
   }
-  return false;
+  if (TargetTrue == OldNode) {
+    TargetTrue = NewNode;
+    Found = true;
+  }
+  return Found;
 }
 
 template <class Machine>
diff --git a/src/IceRegistersX8632.h b/src/IceRegistersX8632.h
index f0baec4..da920e2 100644
--- a/src/IceRegistersX8632.h
+++ b/src/IceRegistersX8632.h
@@ -68,8 +68,8 @@
         Encoded_Not_ByteReg = -1
   };
 
-  /// An enum of X87 Stack Registers. The enum value does match the encoding used
-  /// to binary encode register operands in instructions.
+  /// An enum of X87 Stack Registers. The enum value does match the encoding
+  /// used to binary encode register operands in instructions.
   enum X87STRegister {
 #define X(val, encode, name) Encoded_##val encode,
     X87ST_REGX8632_TABLE
diff --git a/src/IceSwitchLowering.cpp b/src/IceSwitchLowering.cpp
new file mode 100644
index 0000000..06894e6
--- /dev/null
+++ b/src/IceSwitchLowering.cpp
@@ -0,0 +1,96 @@
+//===- subzero/src/IceSwitchLowering.cpp - Switch lowering -----------------==//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements platform independent analysis of switch cases to
+/// improve the generated code.
+///
+//===----------------------------------------------------------------------===//
+#include "IceSwitchLowering.h"
+
+#include "IceTargetLowering.h"
+
+#include <algorithm>
+
+namespace Ice {
+
+CaseClusterArray CaseCluster::clusterizeSwitch(Cfg *Func,
+                                               const InstSwitch *Inst) {
+  CaseClusterArray CaseClusters;
+
+  // Load the cases
+  SizeT NumCases = Inst->getNumCases();
+  CaseClusters.reserve(NumCases);
+  for (SizeT I = 0; I < NumCases; ++I)
+    CaseClusters.emplace_back(Inst->getValue(I), Inst->getLabel(I));
+
+  // Sort the cases
+  std::sort(CaseClusters.begin(), CaseClusters.end(),
+            [](const CaseCluster &x, const CaseCluster &y) {
+              return x.High < y.Low;
+            });
+
+  // Merge adjacent case ranges
+  auto Active = CaseClusters.begin();
+  std::for_each(Active + 1, CaseClusters.end(),
+                [&Active](const CaseCluster &x) {
+                  if (!Active->tryAppend(x))
+                    *(++Active) = x;
+                });
+  CaseClusters.erase(Active + 1, CaseClusters.end());
+
+  // TODO(ascull): Merge in a cycle i.e. -1(=UINTXX_MAX) to 0. This depends on
+  // the types for correct wrap around behavior.
+
+  // A small number of cases is more efficient without a jump table
+  if (CaseClusters.size() < Func->getTarget()->getMinJumpTableSize())
+    return CaseClusters;
+
+  // Test for a single jump table. This can be done in constant time whereas
+  // finding the best set of jump table would be quadratic, too slow(?). If
+  // jump tables were included in the search tree we'd first have to traverse to
+  // them. Ideally we would have an unbalanced tree which is biased towards
+  // frequently executed code but we can't do this well without profiling data.
+  // So, this single jump table is a good starting point where you can get to
+  // the jump table quickly without figuring out how to unbalance the tree.
+  uint64_t MaxValue = CaseClusters.back().High;
+  uint64_t MinValue = CaseClusters.front().Low;
+  // Don't +1 yet to avoid (INT64_MAX-0)+1 overflow
+  uint64_t TotalRange = MaxValue - MinValue;
+
+  // Might be too sparse for the jump table
+  if (NumCases * 2 <= TotalRange)
+    return CaseClusters;
+  // Unlikely. Would mean can't store size of jump table.
+  if (TotalRange == UINT64_MAX)
+    return CaseClusters;
+  ++TotalRange;
+
+  // Replace everything with a jump table
+  InstJumpTable *JumpTable =
+      InstJumpTable::create(Func, TotalRange, Inst->getLabelDefault());
+  for (const CaseCluster &Case : CaseClusters)
+    for (uint64_t I = Case.Low; I <= Case.High; ++I)
+      JumpTable->addTarget(I - MinValue, Case.Label);
+
+  CaseClusters.clear();
+  CaseClusters.emplace_back(MinValue, MaxValue, JumpTable);
+
+  return CaseClusters;
+}
+
+bool CaseCluster::tryAppend(const CaseCluster &New) {
+  // Can only append ranges with the same target and are adjacent
+  bool CanAppend = this->Label == New.Label && this->High + 1 == New.Low;
+  if (CanAppend)
+    this->High = New.High;
+  return CanAppend;
+}
+
+} // end of namespace Ice
diff --git a/src/IceSwitchLowering.h b/src/IceSwitchLowering.h
new file mode 100644
index 0000000..5dfcec3
--- /dev/null
+++ b/src/IceSwitchLowering.h
@@ -0,0 +1,78 @@
+//===- subzero/src/IceSwitchLowering.h - Switch lowering --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief The file contains helpers for switch lowering.
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICESWITCHLOWERING_H
+#define SUBZERO_SRC_ICESWITCHLOWERING_H
+
+#include "IceCfgNode.h"
+#include "IceInst.h"
+
+namespace Ice {
+
+class CaseCluster;
+
+typedef std::vector<CaseCluster, CfgLocalAllocator<CaseCluster>>
+    CaseClusterArray;
+
+/// A cluster of cases can be tested by a common method during switch lowering.
+class CaseCluster {
+  CaseCluster() = delete;
+
+public:
+  enum CaseClusterKind {
+    Range,     /// Numerically adjacent case values with same target.
+    JumpTable, /// Different targets and possibly sparse.
+  };
+
+  CaseCluster(const CaseCluster &) = default;
+  CaseCluster &operator=(const CaseCluster &) = default;
+
+  /// Create a cluster of a single case represented by a unitary range.
+  CaseCluster(uint64_t Value, CfgNode *Label)
+      : Kind(Range), Low(Value), High(Value), Label(Label) {}
+  /// Create a case consisting of a jump table.
+  CaseCluster(uint64_t Low, uint64_t High, InstJumpTable *JT)
+      : Kind(JumpTable), Low(Low), High(High), JT(JT) {}
+
+  CaseClusterKind getKind() const { return Kind; }
+  uint64_t getLow() const { return Low; }
+  uint64_t getHigh() const { return High; }
+  CfgNode *getLabel() const {
+    assert(Kind == Range);
+    return Label;
+  }
+  InstJumpTable *getJumpTable() const {
+    assert(Kind == JumpTable);
+    return JT;
+  }
+
+  /// Discover cases which can be clustered together and return the clusters
+  /// ordered by case value.
+  static CaseClusterArray clusterizeSwitch(Cfg *Func, const InstSwitch *Inst);
+
+private:
+  CaseClusterKind Kind;
+  uint64_t Low;
+  uint64_t High;
+  union {
+    CfgNode *Label;    /// Target for a range.
+    InstJumpTable *JT; /// Jump table targets.
+  };
+
+  /// Try and append a cluster returning whether or not it was successful.
+  bool tryAppend(const CaseCluster &New);
+};
+
+} // end of namespace Ice
+
+#endif //  SUBZERO_SRC_ICESWITCHLOWERING_H
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index a5b52ce..e87bf93 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -208,6 +208,10 @@
     StackAdjustment = SnapshotStackAdjustment;
   }
 
+  /// Get the minimum number of clusters required for a jump table to be
+  /// considered.
+  virtual SizeT getMinJumpTableSize() const = 0;
+
   virtual void emitVariable(const Variable *Var) const = 0;
 
   void emitWithoutPrefix(const ConstantRelocatable *CR) const;
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 87faf01..95e4691 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -78,6 +78,9 @@
     return (typeWidthInBytes(Ty) + 3) & ~3;
   }
 
+  // TODO(ascull): what size is best for ARM?
+  SizeT getMinJumpTableSize() const override { return 3; }
+
   void emitVariable(const Variable *Var) const override;
 
   const char *getConstantPrefix() const final { return "#"; }
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index a72c8db..e581ffa 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -53,6 +53,10 @@
     // i8, and i16 are rounded up to 4 bytes.
     return (typeWidthInBytes(Ty) + 3) & ~3;
   }
+
+  // TODO(ascull): what is the best size of MIPS?
+  SizeT getMinJumpTableSize() const override { return 3; }
+
   void emitVariable(const Variable *Var) const override;
 
   const char *getConstantPrefix() const final { return ""; }
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 0d9572a..9e857a8 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -335,7 +335,7 @@
       _num
 };
 // Define a set of constants based on high-level table entries.
-#define X(tag, size, align, elts, elty, str)                                   \
+#define X(tag, sizeLog2, align, elts, elty, str)                               \
   static const int _table1_##tag = tag;
 ICETYPE_TABLE
 #undef X
@@ -349,7 +349,7 @@
 #undef X
 // Repeat the static asserts with respect to the high-level table
 // entries in case the high-level table has extra entries.
-#define X(tag, size, align, elts, elty, str)                                   \
+#define X(tag, sizeLog2, align, elts, elty, str)                               \
   static_assert(_table1_##tag == _table2_##tag,                                \
                 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
 ICETYPE_TABLE
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 7963b01..eef9536 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -19,6 +19,7 @@
 
 #include "IceDefs.h"
 #include "IceInst.h"
+#include "IceSwitchLowering.h"
 #include "IceTargetLowering.h"
 
 #include <type_traits>
@@ -130,6 +131,8 @@
     return (typeWidthInBytes(Ty) + 3) & ~3;
   }
 
+  SizeT getMinJumpTableSize() const override { return 4; }
+
   void emitVariable(const Variable *Var) const override;
 
   const char *getConstantPrefix() const final { return "$"; }
@@ -204,6 +207,19 @@
   void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal,
                        Operand *SecondVal);
 
+  /// Check the comparison is in [Min,Max]. The flags register will be modified
+  /// with:
+  ///   - below equal, if in range
+  ///   - above, set if not in range
+  /// The index into the range is returned.
+  Operand *lowerCmpRange(Operand *Comparison, uint64_t Min, uint64_t Max);
+  /// Lowering of a cluster of switch cases. If the case is not matched control
+  /// will pass to the default label provided. If the default label is nullptr
+  /// then control will fall through to the next instruction. DoneCmp should be
+  /// true if the flags contain the result of a comparison with the Comparison.
+  void lowerCaseCluster(const CaseCluster &Case, Operand *Src0, bool DoneCmp,
+                        CfgNode *DefaultLabel = nullptr);
+
   typedef void (TargetX86Base::*LowerBinOp)(Variable *, Operand *);
   void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi,
                                 Variable *Dest, Operand *Ptr, Operand *Val);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index cf786f4..cd1c90d 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -29,6 +29,8 @@
 #include "IceUtils.h"
 #include "llvm/Support/MathExtras.h"
 
+#include <stack>
+
 namespace Ice {
 namespace X86Internal {
 
@@ -4494,49 +4496,260 @@
 }
 
 template <class Machine>
-void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
-  // This implements the most naive possible lowering.
-  // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
-  Operand *Src0 = Inst->getComparison();
-  SizeT NumCases = Inst->getNumCases();
-  if (Src0->getType() == IceType_i64) {
-    Src0 = legalizeUndef(Src0);
-    Operand *Src0Lo = loOperand(Src0);
-    Operand *Src0Hi = hiOperand(Src0);
-    if (NumCases >= 2) {
-      Src0Lo = legalizeToVar(Src0Lo);
-      Src0Hi = legalizeToVar(Src0Hi);
+Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
+                                               uint64_t Min, uint64_t Max) {
+  // TODO(ascull): 64-bit should not reach here but only because it is not
+  // implemented yet. This should be able to handle the 64-bit case.
+  assert(Comparison->getType() != IceType_i64);
+  // Subtracting 0 is a nop so don't do it
+  if (Min != 0) {
+    // Avoid clobbering the comparison by copying it
+    Variable *T = nullptr;
+    _mov(T, Comparison);
+    _sub(T, Ctx->getConstantInt32(Min));
+    Comparison = T;
+  }
+
+  _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
+
+  return Comparison;
+}
+
+template <class Machine>
+void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case,
+                                              Operand *Comparison, bool DoneCmp,
+                                              CfgNode *DefaultLabel) {
+  switch (Case.getKind()) {
+  case CaseCluster::JumpTable: {
+    typename Traits::Insts::Label *SkipJumpTable;
+
+    Operand *RangeIndex =
+        lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
+    if (DefaultLabel != nullptr) {
+      _br(Traits::Cond::Br_a, DefaultLabel);
     } else {
-      Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
-      Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
+      // Skip over jump table logic if comparison not in range and no default
+      SkipJumpTable = Traits::Insts::Label::create(Func, this);
+      _br(Traits::Cond::Br_a, SkipJumpTable);
     }
+
+    InstJumpTable *JumpTable = Case.getJumpTable();
+    Context.insert(JumpTable);
+
+    // Make sure the index is a register of the same width as the base
+    Variable *Index;
+    if (RangeIndex->getType() != getPointerType()) {
+      Index = makeReg(getPointerType());
+      _movzx(Index, RangeIndex);
+    } else {
+      Index = legalizeToVar(RangeIndex);
+    }
+
+    constexpr RelocOffsetT RelocOffset = 0;
+    constexpr bool SuppressMangling = true;
+    Constant *Base = Ctx->getConstantSym(RelocOffset, JumpTable->getName(Func),
+                                         SuppressMangling);
+    Constant *Offset = nullptr;
+    uint16_t Shift = typeWidthInBytesLog2(getPointerType());
+    // TODO(ascull): remove need for legalize by allowing null base in memop
+    auto *MemTarget = Traits::X86OperandMem::create(
+        Func, getPointerType(), legalizeToVar(Base), Offset, Index, Shift);
+    Variable *Target = nullptr;
+    _mov(Target, MemTarget);
+    _jmp(Target);
+    // TODO(ascull): sandboxing for indirect jump
+
+    if (DefaultLabel == nullptr)
+      Context.insert(SkipJumpTable);
+    return;
+  }
+  case CaseCluster::Range: {
+    if (Case.getHigh() == Case.getLow()) {
+      // Single item
+      Constant *Value = Ctx->getConstantInt32(Case.getLow());
+      if (!DoneCmp)
+        _cmp(Comparison, Value);
+      _br(Traits::Cond::Br_e, Case.getLabel());
+      if (DefaultLabel != nullptr)
+        _br(DefaultLabel);
+    } else {
+      // Range
+      lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
+      _br(Traits::Cond::Br_be, Case.getLabel());
+      if (DefaultLabel != nullptr)
+        _br(DefaultLabel);
+    }
+    return;
+  }
+  }
+}
+
+template <class Machine>
+void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
+  // Do it the old fashioned way unless asked for the advanced method
+  if (!Ctx->getFlags().getUseAdvancedSwitchLowering()) {
+    // This implements the most naive possible lowering.
+    // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
+    Operand *Src0 = Inst->getComparison();
+    SizeT NumCases = Inst->getNumCases();
+    if (Src0->getType() == IceType_i64) {
+      Src0 = legalize(Src0); // get Base/Index into physical registers
+      Operand *Src0Lo = loOperand(Src0);
+      Operand *Src0Hi = hiOperand(Src0);
+      if (NumCases >= 2) {
+        Src0Lo = legalizeToVar(Src0Lo);
+        Src0Hi = legalizeToVar(Src0Hi);
+      } else {
+        Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
+        Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
+      }
+      for (SizeT I = 0; I < NumCases; ++I) {
+        Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
+        Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
+        typename Traits::Insts::Label *Label =
+            Traits::Insts::Label::create(Func, this);
+        _cmp(Src0Lo, ValueLo);
+        _br(Traits::Cond::Br_ne, Label);
+        _cmp(Src0Hi, ValueHi);
+        _br(Traits::Cond::Br_e, Inst->getLabel(I));
+        Context.insert(Label);
+      }
+      _br(Inst->getLabelDefault());
+      return;
+    }
+    // OK, we'll be slightly less naive by forcing Src into a physical
+    // register if there are 2 or more uses.
+    if (NumCases >= 2)
+      Src0 = legalizeToVar(Src0);
+    else
+      Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
     for (SizeT I = 0; I < NumCases; ++I) {
-      Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
-      Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
-      typename Traits::Insts::Label *Label =
-          Traits::Insts::Label::create(Func, this);
-      _cmp(Src0Lo, ValueLo);
-      _br(Traits::Cond::Br_ne, Label);
-      _cmp(Src0Hi, ValueHi);
+      Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
+      _cmp(Src0, Value);
       _br(Traits::Cond::Br_e, Inst->getLabel(I));
-      Context.insert(Label);
     }
+
     _br(Inst->getLabelDefault());
     return;
   }
-  // OK, we'll be slightly less naive by forcing Src into a physical
-  // register if there are 2 or more uses.
-  if (NumCases >= 2)
-    Src0 = legalizeToVar(Src0);
-  else
-    Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
-  for (SizeT I = 0; I < NumCases; ++I) {
-    Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
-    _cmp(Src0, Value);
-    _br(Traits::Cond::Br_e, Inst->getLabel(I));
+
+  // Group cases together and navigate through them with a binary search
+  CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
+  Operand *Src0 = Inst->getComparison();
+  CfgNode *DefaultLabel = Inst->getLabelDefault();
+
+  assert(CaseClusters.size() != 0); // Should always be at least one
+
+  if (Src0->getType() == IceType_i64) {
+    Src0 = legalize(Src0); // get Base/Index into physical registers
+    Operand *Src0Lo = loOperand(Src0);
+    Operand *Src0Hi = hiOperand(Src0);
+    if (CaseClusters.back().getHigh() > UINT32_MAX) {
+      // TODO(ascull): handle 64-bit case properly (currently naive version)
+      // This might be handled by a higher level lowering of switches.
+      SizeT NumCases = Inst->getNumCases();
+      if (NumCases >= 2) {
+        Src0Lo = legalizeToVar(Src0Lo);
+        Src0Hi = legalizeToVar(Src0Hi);
+      } else {
+        Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
+        Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
+      }
+      for (SizeT I = 0; I < NumCases; ++I) {
+        Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
+        Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
+        typename Traits::Insts::Label *Label =
+            Traits::Insts::Label::create(Func, this);
+        _cmp(Src0Lo, ValueLo);
+        _br(Traits::Cond::Br_ne, Label);
+        _cmp(Src0Hi, ValueHi);
+        _br(Traits::Cond::Br_e, Inst->getLabel(I));
+        Context.insert(Label);
+      }
+      _br(Inst->getLabelDefault());
+      return;
+    } else {
+      // All the values are 32-bit so just check the operand is too and then
+      // fall through to the 32-bit implementation. This is a common case.
+      Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
+      Constant *Zero = Ctx->getConstantInt32(0);
+      _cmp(Src0Hi, Zero);
+      _br(Traits::Cond::Br_ne, DefaultLabel);
+      Src0 = Src0Lo;
+    }
   }
 
-  _br(Inst->getLabelDefault());
+  // 32-bit lowering
+
+  if (CaseClusters.size() == 1) {
+    // Jump straight to default if needed. Currently a common case as jump
+    // tables occur on their own.
+    constexpr bool DoneCmp = false;
+    lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultLabel);
+    return;
+  }
+
+  // Going to be using multiple times so get it in a register early
+  Variable *Comparison = legalizeToVar(Src0);
+
+  // A span is over the clusters
+  struct SearchSpan {
+    SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label)
+        : Begin(Begin), Size(Size), Label(Label) {}
+
+    SizeT Begin;
+    SizeT Size;
+    typename Traits::Insts::Label *Label;
+  };
+  std::stack<SearchSpan, std::deque<SearchSpan, CfgLocalAllocator<SearchSpan>>>
+      SearchSpanStack;
+  SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
+  bool DoneCmp = false;
+
+  while (!SearchSpanStack.empty()) {
+    SearchSpan Span = SearchSpanStack.top();
+    SearchSpanStack.pop();
+
+    if (Span.Label != nullptr)
+      Context.insert(Span.Label);
+
+    switch (Span.Size) {
+    case 0:
+      llvm::report_fatal_error("Invalid SearchSpan size");
+      break;
+
+    case 1:
+      lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
+                       SearchSpanStack.empty() ? nullptr : DefaultLabel);
+      DoneCmp = false;
+      break;
+
+    case 2:
+      lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp);
+      DoneCmp = false;
+      lowerCaseCluster(CaseClusters[Span.Begin + 1], Comparison, DoneCmp,
+                       SearchSpanStack.empty() ? nullptr : DefaultLabel);
+      break;
+
+    default:
+      // Pick the middle item and branch b or ae
+      SizeT PivotIndex = Span.Begin + (Span.Size / 2);
+      const CaseCluster &Pivot = CaseClusters[PivotIndex];
+      Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
+      // TODO(ascull): what if this jump is too big?
+      typename Traits::Insts::Label *Label =
+          Traits::Insts::Label::create(Func, this);
+      _cmp(Comparison, Value);
+      _br(Traits::Cond::Br_b, Label);
+      // Lower the left and (pivot+right) sides, falling through to the right
+      SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
+      SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
+      DoneCmp = true;
+      break;
+    }
+  }
+
+  _br(DefaultLabel);
 }
 
 template <class Machine>
diff --git a/src/IceTypes.cpp b/src/IceTypes.cpp
index 2099cb9..dd06b1e 100644
--- a/src/IceTypes.cpp
+++ b/src/IceTypes.cpp
@@ -30,7 +30,7 @@
 
 // Define a temporary set of enum values based on ICETYPE_TABLE
 enum {
-#define X(tag, size, align, elts, elty, str) _table_tag_##tag,
+#define X(tag, sizeLog2, align, elts, elty, str) _table_tag_##tag,
   ICETYPE_TABLE
 #undef X
       _enum_table_tag_Names
@@ -44,7 +44,7 @@
       _enum_props_table_tag_Names
 };
 // Assert that tags in ICETYPE_TABLE are also in ICETYPE_PROPS_TABLE.
-#define X(tag, size, align, elts, elty, str)                                   \
+#define X(tag, sizeLog2, align, elts, elty, str)                               \
   static_assert(                                                               \
       (unsigned)_table_tag_##tag == (unsigned)_props_table_tag_##tag,          \
       "Inconsistency between ICETYPE_PROPS_TABLE and ICETYPE_TABLE");
@@ -63,7 +63,7 @@
 
 // Define constants for each element size in ICETYPE_TABLE.
 enum {
-#define X(tag, size, align, elts, elty, str) _table_elts_##tag = elts,
+#define X(tag, sizeLog2, align, elts, elty, str) _table_elts_##tag = elts,
   ICETYPE_TABLE
 #undef X
       _enum_table_elts_Elements = 0
@@ -83,7 +83,7 @@
 #undef X
 
 struct TypeAttributeFields {
-  size_t TypeWidthInBytes;
+  int8_t TypeWidthInBytesLog2;
   size_t TypeAlignInBytes;
   size_t TypeNumElements;
   Type TypeElementType;
@@ -91,8 +91,8 @@
 };
 
 const struct TypeAttributeFields TypeAttributes[] = {
-#define X(tag, size, align, elts, elty, str)                                   \
-  { size, align, elts, elty, str }                                             \
+#define X(tag, sizeLog2, align, elts, elty, str)                               \
+  { sizeLog2, align, elts, elty, str }                                         \
   ,
     ICETYPE_TABLE
 #undef X
@@ -133,10 +133,15 @@
 }
 
 size_t typeWidthInBytes(Type Ty) {
+  int8_t Shift = typeWidthInBytesLog2(Ty);
+  return (Shift < 0) ? 0 : 1 << Shift;
+}
+
+int8_t typeWidthInBytesLog2(Type Ty) {
   size_t Index = static_cast<size_t>(Ty);
   if (Index < IceType_NUM)
-    return TypeAttributes[Index].TypeWidthInBytes;
-  llvm_unreachable("Invalid type for typeWidthInBytes()");
+    return TypeAttributes[Index].TypeWidthInBytesLog2;
+  llvm_unreachable("Invalid type for typeWidthInBytesLog2()");
   return 0;
 }
 
diff --git a/src/IceTypes.def b/src/IceTypes.def
index 9fd0e13..94877a2 100644
--- a/src/IceTypes.def
+++ b/src/IceTypes.def
@@ -30,24 +30,24 @@
   //#define X(tag, str, is_elf64, e_machine, e_flags)
 
 #define ICETYPE_TABLE                                                    \
-  /* enum value,  size, align, # elts, element type, printable string */ \
-  /*   (size and alignment in bytes) */                                  \
-  X(IceType_void,   0,  0,     1,      IceType_void, "void")             \
-  X(IceType_i1,     1,  1,     1,      IceType_i1,   "i1")               \
-  X(IceType_i8,     1,  1,     1,      IceType_i8,   "i8")               \
-  X(IceType_i16,    2,  1,     1,      IceType_i16,  "i16")              \
-  X(IceType_i32,    4,  1,     1,      IceType_i32,  "i32")              \
-  X(IceType_i64,    8,  1,     1,      IceType_i64,  "i64")              \
-  X(IceType_f32,    4,  4,     1,      IceType_f32,  "float")            \
-  X(IceType_f64,    8,  8,     1,      IceType_f64,  "double")           \
-  X(IceType_v4i1,  16,  1,     4,      IceType_i1,   "<4 x i1>")         \
-  X(IceType_v8i1,  16,  1,     8,      IceType_i1,   "<8 x i1>")         \
-  X(IceType_v16i1, 16,  1,    16,      IceType_i1,   "<16 x i1>")        \
-  X(IceType_v16i8, 16,  1,    16,      IceType_i8,   "<16 x i8>")        \
-  X(IceType_v8i16, 16,  2,     8,      IceType_i16,  "<8 x i16>")        \
-  X(IceType_v4i32, 16,  4,     4,      IceType_i32,  "<4 x i32>")        \
-  X(IceType_v4f32, 16,  4,     4,      IceType_f32,  "<4 x float>")      \
-//#define X(tag, size, align, elts, elty, str)
+  /* enum value, log_2(size), align, # elts, element type, printable */  \
+  /*     string (size and alignment in bytes) */                         \
+  X(IceType_void,  -1,  0,     1,      IceType_void, "void")             \
+  X(IceType_i1,     0,  1,     1,      IceType_i1,   "i1")               \
+  X(IceType_i8,     0,  1,     1,      IceType_i8,   "i8")               \
+  X(IceType_i16,    1,  1,     1,      IceType_i16,  "i16")              \
+  X(IceType_i32,    2,  1,     1,      IceType_i32,  "i32")              \
+  X(IceType_i64,    3,  1,     1,      IceType_i64,  "i64")              \
+  X(IceType_f32,    2,  4,     1,      IceType_f32,  "float")            \
+  X(IceType_f64,    3,  8,     1,      IceType_f64,  "double")           \
+  X(IceType_v4i1,   4,  1,     4,      IceType_i1,   "<4 x i1>")         \
+  X(IceType_v8i1,   4,  1,     8,      IceType_i1,   "<8 x i1>")         \
+  X(IceType_v16i1,  4,  1,    16,      IceType_i1,   "<16 x i1>")        \
+  X(IceType_v16i8,  4,  1,    16,      IceType_i8,   "<16 x i8>")        \
+  X(IceType_v8i16,  4,  2,     8,      IceType_i16,  "<8 x i16>")        \
+  X(IceType_v4i32,  4,  4,     4,      IceType_i32,  "<4 x i32>")        \
+  X(IceType_v4f32,  4,  4,     4,      IceType_f32,  "<4 x float>")      \
+//#define X(tag, sizeLog2, align, elts, elty, str)
 
 // Dictionary:
 //   V - Is vector type.
diff --git a/src/IceTypes.h b/src/IceTypes.h
index 1430094..da701f4 100644
--- a/src/IceTypes.h
+++ b/src/IceTypes.h
@@ -23,7 +23,7 @@
 namespace Ice {
 
 enum Type {
-#define X(tag, size, align, elts, elty, str) tag,
+#define X(tag, sizeLog2, align, elts, elty, str) tag,
   ICETYPE_TABLE
 #undef X
       IceType_NUM
@@ -60,6 +60,7 @@
 enum OptLevel { Opt_m1, Opt_0, Opt_1, Opt_2 };
 
 size_t typeWidthInBytes(Type Ty);
+int8_t typeWidthInBytesLog2(Type Ty);
 size_t typeAlignInBytes(Type Ty);
 size_t typeNumElements(Type Ty);
 Type typeElementType(Type Ty);
diff --git a/tests_lit/llvm2ice_tests/adv-switch-opt.ll b/tests_lit/llvm2ice_tests/adv-switch-opt.ll
new file mode 100644
index 0000000..86b7536
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/adv-switch-opt.ll
@@ -0,0 +1,191 @@
+; This tests the advanced lowering of switch statements. The advanced lowering
+; uses jump tables, range tests and binary search.
+
+; RUN: %p2i -i %s --filetype=asm --assemble --disassemble --args --adv-switch \
+; RUN:   -O2 | FileCheck %s
+
+; Dense but non-continuous ranges should be converted into a jump table.
+define internal i32 @testJumpTable(i32 %a) {
+entry:
+  switch i32 %a, label %sw.default [
+    i32 91, label %sw.default
+    i32 92, label %sw.bb1
+    i32 93, label %sw.default
+    i32 99, label %sw.bb1
+    i32 98, label %sw.default
+    i32 96, label %sw.bb1
+    i32 97, label %sw.epilog
+  ]
+
+sw.default:
+  %add = add i32 %a, 27
+  br label %sw.epilog
+
+sw.bb1:
+  %tmp = add i32 %a, 16
+  br label %sw.epilog
+
+sw.epilog:
+  %result.1 = phi i32 [ %add, %sw.default ], [ %tmp, %sw.bb1 ], [ 17, %entry ]
+  ret i32 %result.1
+}
+; CHECK-LABEL: testJumpTable
+; CHECK: sub [[IND:[^,]+]],0x5b
+; CHECK-NEXT: cmp [[IND]],0x8
+; CHECK-NEXT: ja
+; CHECK-NEXT: mov [[BASE:[^,]+]],0x0 {{[0-9a-f]+}}: R_386_32 .rodata.testJumpTable$jumptable
+; CHECK-NEXT: mov {{.*}},DWORD PTR {{\[}}[[BASE]]+[[IND]]*4]
+; CHECK-NEXT: jmp
+
+; Continuous ranges which map to the same target should be grouped and
+; efficiently tested.
+define internal i32 @testRangeTest() {
+entry:
+  switch i32 10, label %sw.default [
+    i32 0, label %sw.epilog
+    i32 1, label %sw.epilog
+    i32 2, label %sw.epilog
+    i32 3, label %sw.epilog
+    i32 10, label %sw.bb1
+    i32 11, label %sw.bb1
+    i32 12, label %sw.bb1
+    i32 13, label %sw.bb1
+  ]
+
+sw.default:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.epilog:
+  %result.1 = phi i32 [ 23, %sw.default ], [ 42, %sw.bb1 ], [ 17, %entry ], [ 17, %entry ], [ 17, %entry ], [ 17, %entry ]
+  ret i32 %result.1
+}
+; CHECK-LABEL: testRangeTest
+; CHECK: cmp {{.*}},0x3
+; CHECK-NEXT: jbe
+; CHECK: sub [[REG:[^,]*]],0xa
+; CHECK-NEXT: cmp [[REG]],0x3
+; CHECK-NEXT: jbe
+; CHECK-NEXT: jmp
+
+; Sparse cases should be searched with a binary search.
+define internal i32 @testBinarySearch() {
+entry:
+  switch i32 10, label %sw.default [
+    i32 0, label %sw.epilog
+    i32 10, label %sw.epilog
+    i32 20, label %sw.bb1
+    i32 30, label %sw.bb1
+  ]
+
+sw.default:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.epilog:
+  %result.1 = phi i32 [ 23, %sw.default ], [ 42, %sw.bb1 ], [ 17, %entry ], [ 17, %entry ]
+  ret i32 %result.1
+}
+; CHECK-LABEL: testBinarySearch
+; CHECK: cmp {{.*}},0x14
+; CHECK-NEXT: jb
+; CHECK-NEXT: je
+; CHECK-NEXT: cmp {{.*}},0x1e
+; CHECK-NEXT: je
+; CHECK-NEXT: jmp
+; CHECK-NEXT: cmp {{.*}},0x0
+; CHECK-NEXT: je
+; CHECK-NEXT: cmp {{.*}},0xa
+; CHECK-NEXT: je
+; CHECK-NEXT: jmp
+
+; 64-bit switches where the cases are all 32-bit values should be reduced to a
+; 32-bit switch after checking the top byte is 0.
+define internal i32 @testSwitchSmall64(i64 %a) {
+entry:
+  switch i64 %a, label %sw.default [
+    i64 123, label %return
+    i64 234, label %sw.bb1
+    i64 345, label %sw.bb2
+    i64 456, label %sw.bb3
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.bb3:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 5, %sw.default ], [ 4, %sw.bb3 ], [ 3, %sw.bb2 ], [ 2, %sw.bb1 ], [ 1, %entry ]
+  ret i32 %retval.0
+}
+; CHECK-LABEL: testSwitchSmall64
+; CHECK: cmp {{.*}},0x0
+; CHECK-NEXT: jne
+; CHECK-NEXT: cmp {{.*}},0x159
+; CHECK-NEXT: jb
+; CHECK-NEXT: je
+; CHECK-NEXT: cmp {{.*}},0x1c8
+; CHECK-NEXT: je
+; CHECK-NEXT: jmp
+; CHECK-NEXT: cmp {{.*}},0x7b
+; CHECK-NEXT: je
+; CHECK-NEXT: cmp {{.*}},0xea
+; CHECK-NEXT: je
+; CHECK-NEXT: jmp
+
+; Test for correct 64-bit lowering.
+; TODO(ascull): this should generate better code like the 32-bit version
+define internal i32 @testSwitch64(i64 %a) {
+entry:
+  switch i64 %a, label %sw.default [
+    i64 123, label %return
+    i64 234, label %sw.bb1
+    i64 345, label %sw.bb2
+    i64 78187493520, label %sw.bb3
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.bb3:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 5, %sw.default ], [ 4, %sw.bb3 ], [ 3, %sw.bb2 ], [ 2, %sw.bb1 ], [ 1, %entry ]
+  ret i32 %retval.0
+}
+; CHECK-LABEL: testSwitch64
+; CHECK: cmp {{.*}},0x7b
+; CHECK-NEXT: jne
+; CHECK-NEXT: cmp {{.*}},0x0
+; CHECK-NEXT: je
+; CHECK: cmp {{.*}},0xea
+; CHECK-NEXT: jne
+; CHECK-NEXT: cmp {{.*}},0x0
+; CHECK-NEXT: je
+; CHECK: cmp {{.*}},0x159
+; CHECK-NEXT: jne
+; CHECK-NEXT: cmp {{.*}},0x0
+; CHECK-NEXT: je
+; CHECK: cmp {{.*}},0x34567890
+; CHECK-NEXT: jne
+; CHECK-NEXT: cmp {{.*}},0x12
+; CHECK-NEXT: je