Subzero: Add branch optimization.

1. Unconditional branch to the next basic block is removed.

2. For a conditional branch with a "false" edge to the next basic block, remove the unconditional branch to the fallthrough block.

3. For a conditional branch with a "true" edge to the next basic block, invert the condition and do like #2.

This is enabled only for O2, particularly because inverting the branch condition is a marginally risky operation.

This decreases the instruction count by about 5-6%.

Also, --stats prints a final tally to make it easier to post-process the output.

BUG= none
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/580903005
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index d699ef5..4439323 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -290,6 +290,14 @@
   return Valid;
 }
 
+void Cfg::doBranchOpt() {
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    NodeList::iterator NextNode = I;
+    ++NextNode;
+    (*I)->doBranchOpt(*NextNode);
+  }
+}
+
 // ======================== Dump routines ======================== //
 
 void Cfg::emit() {
diff --git a/src/IceCfg.h b/src/IceCfg.h
index e12bc9d..a2582eb 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -94,6 +94,7 @@
   void livenessLightweight();
   void liveness(LivenessMode Mode);
   bool validateLiveness() const;
+  void doBranchOpt();
 
   // Manage the CurrentNode field, which is used for validating the
   // Variable::DefNode field during dumping/emitting.
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 4de2f57..57aba6b 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -457,6 +457,19 @@
   }
 }
 
+void CfgNode::doBranchOpt(const CfgNode *NextNode) {
+  TargetLowering *Target = Func->getTarget();
+  // Check every instruction for a branch optimization opportunity.
+  // It may be more efficient to iterate in reverse and stop after the
+  // first opportunity, unless there is some target lowering where we
+  // have the possibility of multiple such optimizations per block
+  // (currently not the case for x86 lowering).
+  for (InstList::const_iterator I = Insts.begin(), E = Insts.end(); I != E;
+       ++I) {
+    Target->doBranchOpt(*I, NextNode);
+  }
+}
+
 // ======================== Dump routines ======================== //
 
 void CfgNode::emit(Cfg *Func) const {
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index 954977f..eba98c0 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -65,6 +65,7 @@
   void livenessLightweight();
   bool liveness(Liveness *Liveness);
   void livenessPostprocess(LivenessMode Mode, Liveness *Liveness);
+  void doBranchOpt(const CfgNode *NextNode);
   void emit(Cfg *Func) const;
   void dump(Cfg *Func) const;
 
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 47c7aa8..319e18a 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -384,10 +384,14 @@
   llvm_unreachable("Unknown type");
 }
 
-void GlobalContext::dumpStats(const IceString &Name) {
+void GlobalContext::dumpStats(const IceString &Name, bool Final) {
   if (Flags.DumpStats) {
-    StatsFunction.dump(Name, getStrDump());
-    StatsCumulative.dump("_TOTAL_", getStrDump());
+    if (Final) {
+      StatsCumulative.dump(Name, getStrDump());
+    } else {
+      StatsFunction.dump(Name, getStrDump());
+      StatsCumulative.dump("_TOTAL_", getStrDump());
+    }
   }
 }
 
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index da4b6a1..f997b65 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -132,7 +132,7 @@
 
   // Reset stats at the beginning of a function.
   void resetStats() { StatsFunction.reset(); }
-  void dumpStats(const IceString &Name);
+  void dumpStats(const IceString &Name, bool Final = false);
   void statsUpdateEmitted(uint32_t InstCount) {
     StatsFunction.updateEmitted(InstCount);
     StatsCumulative.updateEmitted(InstCount);
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index a61805e..df28696 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -24,11 +24,12 @@
 namespace {
 
 const struct InstX8632BrAttributes_ {
+  InstX8632::BrCond Opposite;
   const char *DisplayString;
   const char *EmitString;
 } InstX8632BrAttributes[] = {
-#define X(tag, dump, emit)                                                     \
-  { dump, emit }                                                               \
+#define X(tag, opp, dump, emit)                                                \
+  { InstX8632::opp, dump, emit }                                               \
   ,
     ICEINSTX8632BR_TABLE
 #undef X
@@ -128,11 +129,52 @@
   return ".L" + Func->getFunctionName() + "$local$__" + buf;
 }
 
-InstX8632Br::InstX8632Br(Cfg *Func, CfgNode *TargetTrue, CfgNode *TargetFalse,
-                         InstX8632Label *Label, InstX8632::BrCond Condition)
+InstX8632Br::InstX8632Br(Cfg *Func, const CfgNode *TargetTrue,
+                         const CfgNode *TargetFalse,
+                         const InstX8632Label *Label,
+                         InstX8632::BrCond Condition)
     : InstX8632(Func, InstX8632::Br, 0, NULL), Condition(Condition),
       TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
 
+bool InstX8632Br::optimizeBranch(const CfgNode *NextNode) {
+  // If there is no next block, then there can be no fallthrough to
+  // optimize.
+  if (NextNode == NULL)
+    return false;
+  // Intra-block conditional branches can't be optimized.
+  if (Label)
+    return false;
+  // If there is no fallthrough node, such as a non-default case label
+  // for a switch instruction, then there is no opportunity to
+  // optimize.
+  if (getTargetFalse() == NULL)
+    return false;
+
+  // Unconditional branch to the next node can be removed.
+  if (Condition == Br_None && getTargetFalse() == NextNode) {
+    assert(getTargetTrue() == NULL);
+    setDeleted();
+    return true;
+  }
+  // If the fallthrough is to the next node, set fallthrough to NULL
+  // to indicate.
+  if (getTargetFalse() == NextNode) {
+    TargetFalse = NULL;
+    return true;
+  }
+  // If TargetTrue is the next node, and TargetFalse is non-NULL
+  // (which was already tested above), then invert the branch
+  // condition, swap the targets, and set new fallthrough to NULL.
+  if (getTargetTrue() == NextNode) {
+    assert(Condition != Br_None);
+    Condition = InstX8632BrAttributes[Condition].Opposite;
+    TargetTrue = getTargetFalse();
+    TargetFalse = NULL;
+    return true;
+  }
+  return false;
+}
+
 InstX8632Call::InstX8632Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
     : InstX8632(Func, InstX8632::Call, 1, Dest) {
   HasSideEffects = true;
diff --git a/src/IceInstX8632.def b/src/IceInstX8632.def
index 932500c..16c6fb8 100644
--- a/src/IceInstX8632.def
+++ b/src/IceInstX8632.def
@@ -51,20 +51,20 @@
 //#define X(val, name)
 
 #define ICEINSTX8632BR_TABLE   \
-  /* enum value, dump, emit */ \
-  X(Br_a,        "a",  "ja")   \
-  X(Br_ae,       "ae", "jae")  \
-  X(Br_b,        "b",  "jb")   \
-  X(Br_be,       "be", "jbe")  \
-  X(Br_e,        "e",  "je")   \
-  X(Br_g,        "g",  "jg")   \
-  X(Br_ge,       "ge", "jge")  \
-  X(Br_l,        "l",  "jl")   \
-  X(Br_le,       "le", "jle")  \
-  X(Br_ne,       "ne", "jne")  \
-  X(Br_np,       "np", "jnp")  \
-  X(Br_p,        "p",  "jp")   \
-//#define X(tag, dump, emit)
+  /* enum value, opposite, dump, emit */ \
+  X(Br_a,        Br_be,    "a",  "ja")                \
+  X(Br_ae,       Br_b,     "ae", "jae")               \
+  X(Br_b,        Br_ae,    "b",  "jb")                \
+  X(Br_be,       Br_a,     "be", "jbe")               \
+  X(Br_e,        Br_ne,    "e",  "je")                \
+  X(Br_g,        Br_le,    "g",  "jg")                \
+  X(Br_ge,       Br_l,     "ge", "jge")               \
+  X(Br_l,        Br_ge,    "l",  "jl")                \
+  X(Br_le,       Br_g,     "le", "jle")               \
+  X(Br_ne,       Br_e,     "ne", "jne")               \
+  X(Br_np,       Br_p,     "np", "jnp")               \
+  X(Br_p,        Br_np,    "p",  "jp")                \
+//#define X(tag, opp, dump, emit)
 
 #define ICEINSTX8632CMPPS_TABLE \
   /* enum value, emit */        \
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index f0558db..09615ce 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -225,7 +225,7 @@
   };
 
   enum BrCond {
-#define X(tag, dump, emit) tag,
+#define X(tag, opp, dump, emit) tag,
     ICEINSTX8632BR_TABLE
 #undef X
         Br_None
@@ -309,53 +309,62 @@
   // Create a conditional branch to a node.
   static InstX8632Br *create(Cfg *Func, CfgNode *TargetTrue,
                              CfgNode *TargetFalse, BrCond Condition) {
+    const InstX8632Label *NoLabel = NULL;
     return new (Func->allocate<InstX8632Br>())
-        InstX8632Br(Func, TargetTrue, TargetFalse, NULL, Condition);
+        InstX8632Br(Func, TargetTrue, TargetFalse, NoLabel, Condition);
   }
   // Create an unconditional branch to a node.
   static InstX8632Br *create(Cfg *Func, CfgNode *Target) {
+    const CfgNode *NoCondTarget = NULL;
+    const InstX8632Label *NoLabel = NULL;
     return new (Func->allocate<InstX8632Br>())
-        InstX8632Br(Func, NULL, Target, NULL, Br_None);
+        InstX8632Br(Func, NoCondTarget, Target, NoLabel, Br_None);
   }
   // Create a non-terminator conditional branch to a node, with a
   // fallthrough to the next instruction in the current node.  This is
   // used for switch lowering.
   static InstX8632Br *create(Cfg *Func, CfgNode *Target, BrCond Condition) {
+    const CfgNode *NoUncondTarget = NULL;
+    const InstX8632Label *NoLabel = NULL;
     return new (Func->allocate<InstX8632Br>())
-        InstX8632Br(Func, Target, NULL, NULL, Condition);
+        InstX8632Br(Func, Target, NoUncondTarget, NoLabel, Condition);
   }
   // Create a conditional intra-block branch (or unconditional, if
   // Condition==Br_None) to a label in the current block.
   static InstX8632Br *create(Cfg *Func, InstX8632Label *Label,
                              BrCond Condition) {
+    const CfgNode *NoCondTarget = NULL;
+    const CfgNode *NoUncondTarget = NULL;
     return new (Func->allocate<InstX8632Br>())
-        InstX8632Br(Func, NULL, NULL, Label, Condition);
+        InstX8632Br(Func, NoCondTarget, NoUncondTarget, Label, Condition);
   }
-  CfgNode *getTargetTrue() const { return TargetTrue; }
-  CfgNode *getTargetFalse() const { return TargetFalse; }
+  const CfgNode *getTargetTrue() const { return TargetTrue; }
+  const CfgNode *getTargetFalse() const { return TargetFalse; }
+  bool optimizeBranch(const CfgNode *NextNode);
   virtual uint32_t getEmitInstCount() const {
+    uint32_t Sum = 0;
     if (Label)
-      return 1;
-    if (Condition == Br_None)
-      return 1;
+      ++Sum;
+    if (getTargetTrue())
+      ++Sum;
     if (getTargetFalse())
-      return 2;
-    return 1;
+      ++Sum;
+    return Sum;
   }
   virtual void emit(const Cfg *Func) const;
   virtual void dump(const Cfg *Func) const;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
 
 private:
-  InstX8632Br(Cfg *Func, CfgNode *TargetTrue, CfgNode *TargetFalse,
-              InstX8632Label *Label, BrCond Condition);
+  InstX8632Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
+              const InstX8632Label *Label, BrCond Condition);
   InstX8632Br(const InstX8632Br &) LLVM_DELETED_FUNCTION;
   InstX8632Br &operator=(const InstX8632Br &) LLVM_DELETED_FUNCTION;
   virtual ~InstX8632Br() {}
   BrCond Condition;
-  CfgNode *TargetTrue;
-  CfgNode *TargetFalse;
-  InstX8632Label *Label; // Intra-block branch target
+  const CfgNode *TargetTrue;
+  const CfgNode *TargetFalse;
+  const InstX8632Label *Label; // Intra-block branch target
 };
 
 // AdjustStack instruction - subtracts esp by the given amount and
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index b15c4f1..7383fff 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -125,6 +125,11 @@
   void doNopInsertion();
   // Lowers a single instruction.
   void lower();
+  // Tries to do branch optimization on a single instruction.  Returns
+  // true if some optimization was done.
+  virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
+    return false;
+  }
 
   // Returns a variable pre-colored to the specified physical
   // register.  This is generally used to get very direct access to
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 464a2e8..31c11d4 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -395,6 +395,14 @@
   T_genFrame.printElapsedUs(Context, "genFrame()");
   Func->dump("After stack frame mapping");
 
+  // Branch optimization.  This needs to be done just before code
+  // emission.  In particular, no transformations that insert or
+  // reorder CfgNodes should be done after branch optimization.  We go
+  // ahead and do it before nop insertion to reduce the amount of work
+  // needed for searching for opportunities.
+  Func->doBranchOpt();
+  Func->dump("After branch optimization");
+
   // Nop insertion
   if (shouldDoNopInsertion()) {
     Func->doNopInsertion();
@@ -444,6 +452,13 @@
   }
 }
 
+bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
+  if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
+    return Br->optimizeBranch(NextNode);
+  }
+  return false;
+}
+
 IceString TargetX8632::RegNames[] = {
 #define X(val, init, name, name16, name8, scratch, preserved, stackptr,        \
           frameptr, isI8, isInt, isFP)                                         \
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 99235de..a104dac 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -28,6 +28,7 @@
 
   virtual void translateOm1();
   virtual void translateO2();
+  virtual bool doBranchOpt(Inst *I, const CfgNode *NextNode);
 
   virtual Variable *getPhysicalRegister(SizeT RegNum);
   virtual IceString getRegName(SizeT RegNum, Type Ty) const;
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index 609804c..1353b07 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -2247,7 +2247,6 @@
            << "\n";
     ErrorStatus = true;
   }
-  return;
 }
 
 } // end of namespace Ice
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index 3cb13b3..f147877 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -166,10 +166,11 @@
   Ice::GlobalContext Ctx(Ls, Os, VMask, TargetArch, OptLevel, TestPrefix,
                          Flags);
 
+  int ErrorStatus = 0;
   if (BuildOnRead) {
     Ice::PNaClTranslator Translator(&Ctx, Flags);
     Translator.translate(IRFilename);
-    return Translator.getErrorStatus();
+    ErrorStatus = Translator.getErrorStatus();
   } else {
     // Parse the input LLVM IR file into a module.
     SMDiagnostic Err;
@@ -189,6 +190,9 @@
 
     Ice::Converter Converter(Mod, &Ctx, Flags);
     Converter.convertToIce();
-    return Converter.getErrorStatus();
+    ErrorStatus = Converter.getErrorStatus();
   }
+  const bool FinalStats = true;
+  Ctx.dumpStats("_FINAL_", FinalStats);
+  return ErrorStatus;
 }
diff --git a/tests_lit/llvm2ice_tests/branch-opt.ll b/tests_lit/llvm2ice_tests/branch-opt.ll
new file mode 100644
index 0000000..cb716dd
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/branch-opt.ll
@@ -0,0 +1,104 @@
+; Tests the branch optimizations under O2 (against a lack of
+; optimizations under Om1).
+
+; RUN: %llvm2ice -O2 --verbose none %s \
+; RUN:   | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
+; RUN:   | llvm-objdump -d -symbolize -x86-asm-syntax=intel - \
+; RUN:   | FileCheck --check-prefix=O2 %s
+; RUN: %llvm2ice -Om1 --verbose none %s \
+; RUN:   | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
+; RUN:   | llvm-objdump -d -symbolize -x86-asm-syntax=intel - \
+; RUN:   | FileCheck --check-prefix=OM1 %s
+; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
+; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
+; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
+; RUN:                           | FileCheck --check-prefix=DUMP %s
+
+declare void @dummy()
+
+; An unconditional branch to the next block should be removed.
+define void @testUncondToNextBlock() {
+entry:
+  call void @dummy()
+  br label %next
+next:
+  call void @dummy()
+  ret void
+}
+; O2-LABEL: testUncondToNextBlock
+; O2: call
+; O2-NEXT: call
+
+; OM1-LABEL: testUncondToNextBlock
+; OM1: call
+; OM1-NEXT: jmp
+; OM1-NEXT: call
+
+; For a conditional branch with a fallthrough to the next block, the
+; fallthrough branch should be removed.
+define void @testCondFallthroughToNextBlock(i32 %arg) {
+entry:
+  %cmp = icmp sge i32 %arg, 123
+  br i1 %cmp, label %target, label %fallthrough
+fallthrough:
+  call void @dummy()
+  ret void
+target:
+  call void @dummy()
+  ret void
+}
+; O2-LABEL: testCondFallthroughToNextBlock
+; O2: cmp {{.*}}, 123
+; O2-NEXT: jge
+; O2-NEXT: call
+; O2: ret
+; O2: call
+; O2: ret
+
+; OM1-LABEL: testCondFallthroughToNextBlock
+; OM1: cmp {{.*}}, 123
+; OM1: jge
+; OM1: cmp
+; OM1: jne
+; OM1: jmp
+; OM1: call
+; OM1: ret
+; OM1: call
+; OM1: ret
+
+; For a conditional branch with the next block as the target and a
+; different block as the fallthrough, the branch condition should be
+; inverted, the fallthrough block changed to the target, and the
+; branch to the next block removed.
+define void @testCondTargetNextBlock(i32 %arg) {
+entry:
+  %cmp = icmp sge i32 %arg, 123
+  br i1 %cmp, label %fallthrough, label %target
+fallthrough:
+  call void @dummy()
+  ret void
+target:
+  call void @dummy()
+  ret void
+}
+; O2-LABEL: testCondTargetNextBlock
+; O2: cmp {{.*}}, 123
+; O2-NEXT: jl
+; O2-NEXT: call
+; O2: ret
+; O2: call
+; O2: ret
+
+; OM1-LABEL: testCondTargetNextBlock
+; OM1: cmp {{.*}}, 123
+; OM1: jge
+; OM1: cmp
+; OM1: jne
+; OM1: jmp
+; OM1: call
+; OM1: ret
+; OM1: call
+; OM1: ret
+
+; ERRORS-NOT: ICE translation error
+; DUMP-NOT: SZ
diff --git a/tests_lit/llvm2ice_tests/ebp_args.ll b/tests_lit/llvm2ice_tests/ebp_args.ll
index 8360bf9..03264fe 100644
--- a/tests_lit/llvm2ice_tests/ebp_args.ll
+++ b/tests_lit/llvm2ice_tests/ebp_args.ll
@@ -5,7 +5,7 @@
 
 ; RUN: %llvm2ice -Om1 --target=x8632 --verbose none %s | FileCheck %s
 
-declare i32 @memcpy_helper2(i32 %buf, i32 %buf2, i32 %n);
+declare i32 @memcpy_helper2(i32 %buf, i32 %buf2, i32 %n)
 
 define i32 @memcpy_helper(i32 %buf, i32 %n) {
 entry:
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll b/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll
index 01a4571..7207c8e 100644
--- a/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll
@@ -20,7 +20,7 @@
 ; reuse the flags set by the cmpxchg instruction itself.
 ; This is only expected to work w/ O2, based on lightweight liveness.
 ; (Or if we had other means to detect the only use).
-declare void @use_value(i32);
+declare void @use_value(i32)
 
 define i32 @test_atomic_cmpxchg_loop(i32 %iptr, i32 %expected, i32 %desired) {
 entry:
@@ -46,7 +46,7 @@
 ; Make sure the phi assignment for succeeded_first_try is still there.
 ; O2: mov {{.*}}, 2
 ; O2-NOT: cmp
-; O2: je
+; O2: jne
 ; Make sure the call isn't accidentally deleted.
 ; O2: call
 ;
@@ -78,7 +78,7 @@
 ; O2: lock
 ; O2-NEXT: cmpxchg dword ptr [e{{[^a].}}], e{{[^a]}}
 ; O2-NOT: cmp
-; O2: je
+; O2: jne
 
 
 ; Still works if the compare operands are constants.
@@ -102,7 +102,7 @@
 ; Should be using NEXT: see issue 3929
 ; O2: cmpxchg dword ptr [e{{[^a].}}], e{{[^a]}}
 ; O2-NOT: cmp
-; O2: je
+; O2: jne
 
 ; This is a case where the flags cannot be reused (compare is for some
 ; other condition).
@@ -126,7 +126,7 @@
 ; O2-NEXT: cmpxchg dword ptr [e{{[^a].}}], e{{[^a]}}
 ; O2: mov {{.*}}
 ; O2: cmp
-; O2: jg
+; O2: jle
 
 ; Another case where the flags cannot be reused (the comparison result
 ; is used somewhere else).
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll b/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
index 0ad2e91..8ca3180 100644
--- a/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
@@ -213,8 +213,7 @@
 }
 ; CHECK-LABEL: could_have_hoisted_loads
 ; CHECK: dword ptr [0]
-; CHECK: je {{.*}}
-; CHECK: jmp {{.*}}
+; CHECK: jne {{.*}}
 ; CHECK: mov {{.*}}, dword ptr
 ; CHECK: ret
 ; CHECK: mfence
diff --git a/tests_lit/llvm2ice_tests/phi.ll b/tests_lit/llvm2ice_tests/phi.ll
index f9ec1ce..f0da656 100644
--- a/tests_lit/llvm2ice_tests/phi.ll
+++ b/tests_lit/llvm2ice_tests/phi.ll
@@ -30,10 +30,8 @@
 ; CHECK: mov {{.*}}, 0
 ; CHECK: mov [[PHI:.*]],
 ; CHECK: cmp {{.*}}, 0
-; CHECK: jne
-; CHECK: :
+; CHECK: je
 ; CHECK: mov [[PHI]], 0
-; CHECK: :
 ; CHECK: movzx {{.*}}, [[PHI]]
 
 define internal i32 @testPhi2(i32 %arg) {
@@ -50,10 +48,8 @@
 ; CHECK-LABEL: testPhi2
 ; CHECK: mov {{.*}}, 12345
 ; CHECK: cmp {{.*}}, 0
-; CHECK-NEXT: jg
-; CHECK: :
+; CHECK-NEXT: jle
 ; CHECK: mov [[PHI:.*]], 54321
-; CHECK: :
 ; CHECK: mov {{.*}}, [[PHI]]
 
 ; ERRORS-NOT: ICE translation error
@@ -100,11 +96,9 @@
 ; CHECK-LABEL: testPhi3
 ; CHECK: push [[EBX:.*]]
 ; CHECK: mov {{.*}}, dword ptr [esp
-; CHECK: jmp
 ; CHECK: mov
 ; CHECK: mov {{.*}}[[ADDR:.*1000]]
 ; CHECK: cmp {{.*}}, 0
-; CHECK: je
-; CHECK: jmp
+; CHECK: jne
 ; CHECK: mov {{.*}}[[ADDR]]
 ; CHECK: pop [[EBX]]
diff --git a/tests_lit/llvm2ice_tests/simple-loop.ll b/tests_lit/llvm2ice_tests/simple-loop.ll
index dd94a57..b12f979 100644
--- a/tests_lit/llvm2ice_tests/simple-loop.ll
+++ b/tests_lit/llvm2ice_tests/simple-loop.ll
@@ -38,10 +38,9 @@
 ; CHECK-LABEL: simple_loop
 ; CHECK:      mov ecx, dword ptr [esp{{.*}}+{{.*}}{{[0-9]+}}]
 ; CHECK:      cmp ecx, 0
-; CHECK-NEXT: jg {{[0-9]}}
+; CHECK-NEXT: jle {{[0-9]}}
 ; NaCl bundle padding
 ; CHECK-NEXT: nop
-; CHECK-NEXT: jmp {{[0-9]}}
 
 ; TODO: the mov from ebx to esi seems redundant here - so this may need to be
 ; modified later