[SystemZ] Reuse CC results for integer comparisons with zero

This also fixes a bug in the predication of LR to LOCR: I'd forgotten
that with these in-place instruction builds, the implicit operands need
to be added manually.  I think this was latent until now, but is tested
by int-cmp-45.c.  It also adds a CC valid mask to STOC, again tested by
int-cmp-45.c.

llvm-svn: 187573
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index ffd842d..6acdcd4 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1813,7 +1813,8 @@
     if (Invert)
       CCMask ^= CCValid;
     BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
-      .addReg(SrcReg).addOperand(Base).addImm(Disp).addImm(CCMask);
+      .addReg(SrcReg).addOperand(Base).addImm(Disp)
+      .addImm(CCValid).addImm(CCMask);
     MI->eraseFromParent();
     return MBB;
   }
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 915891d..9883714 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -61,12 +61,41 @@
   // The access size of all memory operands in bytes, or 0 if not known.
   bits<5> AccessBytes = 0;
 
-  let TSFlags{0} = SimpleBDXLoad;
-  let TSFlags{1} = SimpleBDXStore;
-  let TSFlags{2} = Has20BitOffset;
-  let TSFlags{3} = HasIndex;
-  let TSFlags{4} = Is128Bit;
-  let TSFlags{9-5} = AccessBytes;
+  // If the instruction sets CC to a useful value, this gives the mask
+  // of all possible CC results.  The mask has the same form as
+  // SystemZ::CCMASK_*.
+  bits<4> CCValues = 0;
+
+  // True if the instruction sets CC to 0 when the result is 0.
+  bit CCHasZero = 0;
+
+  // True if the instruction sets CC to 1 when the result is less than 0
+  // and to 2 when the result is greater than 0.
+  bit CCHasOrder = 0;
+
+  // True if the instruction is conditional and if the CC mask operand
+  // comes first (as for BRC, etc.).
+  bit CCMaskFirst = 0;
+
+  // Similar, but true if the CC mask operand comes last (as for LOC, etc.).
+  bit CCMaskLast = 0;
+
+  // True if the instruction is the "logical" rather than "arithmetic" form,
+  // in cases where a distinction exists.
+  bit IsLogical = 0;
+
+  let TSFlags{0}     = SimpleBDXLoad;
+  let TSFlags{1}     = SimpleBDXStore;
+  let TSFlags{2}     = Has20BitOffset;
+  let TSFlags{3}     = HasIndex;
+  let TSFlags{4}     = Is128Bit;
+  let TSFlags{9-5}   = AccessBytes;
+  let TSFlags{13-10} = CCValues;
+  let TSFlags{14}    = CCHasZero;
+  let TSFlags{15}    = CCHasOrder;
+  let TSFlags{16}    = CCMaskFirst;
+  let TSFlags{17}    = CCMaskLast;
+  let TSFlags{18}    = IsLogical;
 }
 
 //===----------------------------------------------------------------------===//
@@ -623,11 +652,12 @@
 class CondStoreRSY<string mnemonic, bits<16> opcode,
                    RegisterOperand cls, bits<5> bytes,
                    AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$R3),
+  : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3),
             mnemonic#"$R3\t$R1, $BD2", []>,
     Requires<[FeatureLoadStoreOnCond]> {
   let mayStore = 1;
   let AccessBytes = bytes;
+  let CCMaskLast = 1;
 }
 
 // Like CondStoreRSY, but used for the raw assembly form.  The condition-code
@@ -686,7 +716,9 @@
                    RegisterOperand cls2>
   : InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3),
             mnemonic#"r$R3\t$R1, $R2", []>,
-    Requires<[FeatureLoadStoreOnCond]>;
+    Requires<[FeatureLoadStoreOnCond]> {
+  let CCMaskLast = 1;
+}
 
 // Like CondUnaryRRF, but used for the raw assembly form.  The condition-code
 // mask is the third operand rather than being part of the mnemonic.
@@ -748,6 +780,7 @@
   let DisableEncoding = "$R1src";
   let mayLoad = 1;
   let AccessBytes = bytes;
+  let CCMaskLast = 1;
 }
 
 // Like CondUnaryRSY, but used for the raw assembly form.  The condition-code
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2b604a9..9913db7 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -341,7 +341,8 @@
     if (unsigned CondOpcode = getConditionalMove(Opcode)) {
       MI->setDesc(get(CondOpcode));
       MachineInstrBuilder(*MI->getParent()->getParent(), MI)
-        .addImm(CCValid).addImm(CCMask);
+        .addImm(CCValid).addImm(CCMask)
+        .addReg(SystemZ::CC, RegState::Implicit);;
       return true;
     }
   }
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 917ac6e..763a395 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -28,17 +28,27 @@
 namespace SystemZII {
   enum {
     // See comments in SystemZInstrFormats.td.
-    SimpleBDXLoad  = (1 << 0),
-    SimpleBDXStore = (1 << 1),
-    Has20BitOffset = (1 << 2),
-    HasIndex       = (1 << 3),
-    Is128Bit       = (1 << 4),
-    AccessSizeMask = (31 << 5),
-    AccessSizeShift = 5
+    SimpleBDXLoad   = (1 << 0),
+    SimpleBDXStore  = (1 << 1),
+    Has20BitOffset  = (1 << 2),
+    HasIndex        = (1 << 3),
+    Is128Bit        = (1 << 4),
+    AccessSizeMask  = (31 << 5),
+    AccessSizeShift = 5,
+    CCValuesMask    = (15 << 10),
+    CCValuesShift   = 10,
+    CCHasZero       = (1 << 14),
+    CCHasOrder      = (1 << 15),
+    CCMaskFirst     = (1 << 16),
+    CCMaskLast      = (1 << 17),
+    IsLogical       = (1 << 18)
   };
   static inline unsigned getAccessSize(unsigned int Flags) {
     return (Flags & AccessSizeMask) >> AccessSizeShift;
   }
+  static inline unsigned getCCValues(unsigned int Flags) {
+    return (Flags & CCValuesMask) >> CCValuesShift;
+  }
 
   // SystemZ MachineOperand target flags.
   enum {
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 341eb90..748539a 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -59,7 +59,7 @@
 // the first operand.  It seems friendlier to use mnemonic forms like
 // JE and JLH when writing out the assembly though.
 let isBranch = 1, isTerminator = 1, Uses = [CC] in {
-  let isCodeGenOnly = 1 in {
+  let isCodeGenOnly = 1, CCMaskFirst = 1 in {
     def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1,
                                          brtarget16:$I2), "j$R1\t$I2",
                      [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>;
@@ -195,7 +195,7 @@
 
 // The definitions here are for the call-clobbered registers.
 let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
-                        F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
+                        F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC],
     R1 = 14, isCodeGenOnly = 1 in {
   def BRAS  : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops),
                      "bras\t%r14, $I2", []>;
@@ -512,9 +512,12 @@
 //===----------------------------------------------------------------------===//
 
 let Defs = [CC] in {
-  def LCR   : UnaryRR <"lc",   0x13,   ineg,      GR32, GR32>;
-  def LCGR  : UnaryRRE<"lcg",  0xB903, ineg,      GR64, GR64>;
-  def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
+  let CCValues = 0xF, CCHasZero = 1 in {
+    def LCR  : UnaryRR <"lc",  0x13,   ineg, GR32, GR32>;
+    def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
+  }
+  let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
+    def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
 }
 defm : SXU<ineg, LCGFR>;
 
@@ -566,7 +569,7 @@
 //===----------------------------------------------------------------------===//
 
 // Plain addition.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
   // Addition of a register.
   let isCommutable = 1 in {
     defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>;
@@ -637,7 +640,7 @@
 
 // Plain substraction.  Although immediate forms exist, we use the
 // add-immediate instruction instead.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
   // Subtraction of a register.
   defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>;
   def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>;
@@ -687,13 +690,14 @@
 
 let Defs = [CC] in {
   // ANDs of a register.
-  let isCommutable = 1 in {
+  let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
     defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>;
     defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>;
   }
 
   let isConvertibleToThreeAddress = 1 in {
     // ANDs of a 16-bit immediate, leaving other bits unaffected.
+    // The CC result only reflects the 16-bit field, not the full register.
     let isCodeGenOnly = 1 in {
       def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
       def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
@@ -704,15 +708,19 @@
     def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
 
     // ANDs of a 32-bit immediate, leaving other bits unaffected.
-    let isCodeGenOnly = 1 in
+    // The CC result only reflects the 32-bit field, which means we can
+    // use it as a zero indicator for i32 operations but not otherwise.
+    let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
       def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
     def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
     def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
   }
 
   // ANDs of memory.
-  defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
-  def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+  let CCValues = 0xC, CCHasZero = 1 in {
+    defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
+    def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; 
+  }
 
   // AND to memory
   defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
@@ -726,12 +734,13 @@
 
 let Defs = [CC] in {
   // ORs of a register.
-  let isCommutable = 1 in {
+  let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
     defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>;
     defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>;
   }
 
   // ORs of a 16-bit immediate, leaving other bits unaffected.
+  // The CC result only reflects the 16-bit field, not the full register.
   let isCodeGenOnly = 1 in {
     def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
     def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
@@ -742,14 +751,18 @@
   def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
 
   // ORs of a 32-bit immediate, leaving other bits unaffected.
-  let isCodeGenOnly = 1 in
+  // The CC result only reflects the 32-bit field, which means we can
+  // use it as a zero indicator for i32 operations but not otherwise.
+  let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
     def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
   def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
   def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
 
   // ORs of memory.
-  defm O  : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
-  def  OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+  let CCValues = 0xC, CCHasZero = 1 in {
+    defm O  : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
+    def  OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+  }
 
   // OR to memory
   defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
@@ -763,20 +776,24 @@
 
 let Defs = [CC] in {
   // XORs of a register.
-  let isCommutable = 1 in {
+  let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
     defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>;
     defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>;
   }
 
   // XORs of a 32-bit immediate, leaving other bits unaffected.
-  let isCodeGenOnly = 1 in
+  // The CC result only reflects the 32-bit field, which means we can
+  // use it as a zero indicator for i32 operations but not otherwise.
+  let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
     def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
   def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
   def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
 
   // XORs of memory.
-  defm X  : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
-  def  XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+  let CCValues = 0xC, CCHasZero = 1 in {
+    defm X  : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
+    def  XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+  }
 
   // XOR to memory
   defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
@@ -849,7 +866,7 @@
 }
 
 // Arithmetic shift right.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in {
   defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
   def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>;
 }
@@ -862,11 +879,12 @@
 
 // Rotate second operand left and inserted selected bits into first operand.
 // These can act like 32-bit operands provided that the constant start and
-// end bits (operands 2 and 3) are in the range [32, 64)
+// end bits (operands 2 and 3) are in the range [32, 64).
 let Defs = [CC] in {
   let isCodeGenOnly = 1 in
-    def RISBG32 : RotateSelectRIEf<"risbg",  0xEC55, GR32, GR32>;
-  def RISBG : RotateSelectRIEf<"risbg",  0xEC55, GR64, GR64>;
+    def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
+  let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
+    def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
 }
 
 // Forms of RISBG that only affect one word of the destination register.
@@ -880,7 +898,8 @@
              Requires<[FeatureHighWord]>;
 
 // Rotate second operand left and perform a logical operation with selected
-// bits of the first operand.
+// bits of the first operand.  The CC result only describes the selected bits,
+// so isn't useful for a full comparison against zero.
 let Defs = [CC] in {
   def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>;
   def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>;
@@ -892,7 +911,7 @@
 //===----------------------------------------------------------------------===//
 
 // Signed comparisons.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xE in {
   // Comparison with a register.
   def CR   : CompareRR <"c",   0x19,   z_cmp,     GR32, GR32>;
   def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>;
@@ -926,7 +945,7 @@
 defm : SXB<z_cmp, GR64, CGFR>;
 
 // Unsigned comparisons.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
   // Comparison with a register.
   def CLR   : CompareRR <"cl",   0x15,   z_ucmp,    GR32, GR32>;
   def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>;
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 9b637c0..f0ea3e2 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -7,18 +7,36 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass does two things:
-// (1) fuse compares and branches into COMPARE AND BRANCH instructions
-// (2) make sure that all branches are in range.
+// This pass does three things:
+// (1) try to remove compares if CC already contains the required information
+// (2) fuse compares and branches into COMPARE AND BRANCH instructions
+// (3) make sure that all branches are in range.
 //
-// We do (1) here rather than earlier because the fused form prevents
-// predication.
+// We do (1) here rather than earlier because some transformations can
+// change the set of available CC values and we generally want those
+// transformations to have priority over (1).  This is especially true in
+// the commonest case where the CC value is used by a single in-range branch
+// instruction, since (2) will then be able to fuse the compare and the
+// branch instead.
 //
-// Doing it so late makes it more likely that a register will be reused
+// For example, two-address NILF can sometimes be converted into
+// three-address RISBLG.  NILF produces a CC value that indicates whether
+// the low word is zero, but RISBLG does not modify CC at all.  On the
+// other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG.
+// The CC value produced by NILL isn't useful for our purposes, but the
+// value produced by RISBG can be used for any comparison with zero
+// (not just equality).  So there are some transformations that lose
+// CC values (while still being worthwhile) and others that happen to make
+// the CC result more useful than it was originally.
+//
+// We do (2) here rather than earlier because the fused form prevents
+// predication.  It also has to happen after (1).
+//
+// Doing (2) so late makes it more likely that a register will be reused
 // between the compare and the branch, but it isn't clear whether preventing
 // that would be a win or not.
 //
-// There are several ways in which (2) could be done.  One aggressive
+// There are several ways in which (3) could be done.  One aggressive
 // approach is to assume that all branches are in range and successively
 // replace those that turn out not to be in range with a longer form
 // (branch relaxation).  A simple implementation is to continually walk
@@ -156,6 +174,7 @@
     void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
                         bool AssumeRelaxed);
     TerminatorInfo describeTerminator(MachineInstr *MI);
+    bool optimizeCompareZero(MachineInstr *PrevCCSetter, MachineInstr *Compare);
     bool fuseCompareAndBranch(MachineInstr *Compare);
     uint64_t initMBBInfo();
     bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
@@ -254,6 +273,15 @@
   return Terminator;
 }
 
+// Return true if CC is live out of MBB.
+static bool isCCLiveOut(MachineBasicBlock *MBB) {
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI)
+    if ((*SI)->isLiveIn(SystemZ::CC))
+      return true;
+  return false;
+}
+
 // Return true if CC is live after MBBI.
 static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
                           const TargetRegisterInfo *TRI) {
@@ -269,12 +297,130 @@
       return false;
   }
 
-  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-         SE = MBB->succ_end(); SI != SE; ++SI)
-    if ((*SI)->isLiveIn(SystemZ::CC))
-      return true;
+  return isCCLiveOut(MBB);
+}
 
-  return false;
+// Return true if all uses of the CC value produced by MBBI could make do
+// with the CC values in ReusableCCMask.  When returning true, point AlterMasks
+// to the "CC valid" and "CC mask" operands for each condition.
+static bool canRestrictCCMask(MachineBasicBlock::iterator MBBI,
+                              unsigned ReusableCCMask,
+                              SmallVectorImpl<MachineOperand *> &AlterMasks,
+                              const TargetRegisterInfo *TRI) {
+  MachineBasicBlock *MBB = MBBI->getParent();
+  MachineBasicBlock::iterator MBBE = MBB->end();
+  for (++MBBI; MBBI != MBBE; ++MBBI) {
+    if (MBBI->readsRegister(SystemZ::CC, TRI)) {
+      // Fail if this isn't a use of CC that we understand.
+      unsigned MBBIFlags = MBBI->getDesc().TSFlags;
+      unsigned FirstOpNum;
+      if (MBBIFlags & SystemZII::CCMaskFirst)
+        FirstOpNum = 0;
+      else if (MBBIFlags & SystemZII::CCMaskLast)
+        FirstOpNum = MBBI->getNumExplicitOperands() - 2;
+      else
+        return false;
+
+      // Check whether the instruction predicate treats all CC values
+      // outside of ReusableCCMask in the same way.  In that case it
+      // doesn't matter what those CC values mean.
+      unsigned CCValid = MBBI->getOperand(FirstOpNum).getImm();
+      unsigned CCMask = MBBI->getOperand(FirstOpNum + 1).getImm();
+      unsigned OutValid = ~ReusableCCMask & CCValid;
+      unsigned OutMask = ~ReusableCCMask & CCMask;
+      if (OutMask != 0 && OutMask != OutValid)
+        return false;
+
+      AlterMasks.push_back(&MBBI->getOperand(FirstOpNum));
+      AlterMasks.push_back(&MBBI->getOperand(FirstOpNum + 1));
+
+      // Succeed if this was the final use of the CC value.
+      if (MBBI->killsRegister(SystemZ::CC, TRI))
+        return true;
+    }
+    // Succeed if the instruction redefines CC.
+    if (MBBI->definesRegister(SystemZ::CC, TRI))
+      return true;
+  }
+  // Fail if there are other uses of CC that we didn't see.
+  return !isCCLiveOut(MBB);
+}
+
+// Try to make Compare redundant with PrevCCSetter, the previous setter of CC,
+// by looking for cases where Compare compares the result of PrevCCSetter
+// against zero.  Return true on success and if Compare can therefore
+// be deleted.
+bool SystemZLongBranch::optimizeCompareZero(MachineInstr *PrevCCSetter,
+                                            MachineInstr *Compare) {
+  if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
+    return false;
+
+  // Check whether this is a comparison against zero.
+  if (Compare->getNumExplicitOperands() != 2 ||
+      !Compare->getOperand(1).isImm() ||
+      Compare->getOperand(1).getImm() != 0)
+    return false;
+
+  // See which compare-style condition codes are available after PrevCCSetter.
+  unsigned PrevFlags = PrevCCSetter->getDesc().TSFlags;
+  unsigned ReusableCCMask = 0;
+  if (PrevFlags & SystemZII::CCHasZero)
+    ReusableCCMask |= SystemZ::CCMASK_CMP_EQ;
+
+  // For unsigned comparisons with zero, only equality makes sense.
+  unsigned CompareFlags = Compare->getDesc().TSFlags;
+  if (!(CompareFlags & SystemZII::IsLogical) &&
+      (PrevFlags & SystemZII::CCHasOrder))
+    ReusableCCMask |= SystemZ::CCMASK_CMP_LT | SystemZ::CCMASK_CMP_GT;
+
+  if (ReusableCCMask == 0)
+    return false;
+
+  // Make sure that PrevCCSetter sets the value being compared.
+  unsigned SrcReg = Compare->getOperand(0).getReg();
+  unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
+  if (!PrevCCSetter->getOperand(0).isReg() ||
+      !PrevCCSetter->getOperand(0).isDef() ||
+      PrevCCSetter->getOperand(0).getReg() != SrcReg ||
+      PrevCCSetter->getOperand(0).getSubReg() != SrcSubReg)
+    return false;
+
+  // Make sure that SrcReg survives until Compare.
+  MachineBasicBlock::iterator MBBI = PrevCCSetter, MBBE = Compare;
+  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+  for (++MBBI; MBBI != MBBE; ++MBBI)
+    if (MBBI->modifiesRegister(SrcReg, TRI))
+      return false;
+
+  // See whether all uses of Compare's CC value could make do with
+  // the values produced by PrevCCSetter.
+  SmallVector<MachineOperand *, 4> AlterMasks;
+  if (!canRestrictCCMask(Compare, ReusableCCMask, AlterMasks, TRI))
+    return false;
+
+  // Alter the CC masks that canRestrictCCMask says need to be altered.
+  unsigned CCValues = SystemZII::getCCValues(PrevFlags);
+  assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
+  for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) {
+    AlterMasks[I]->setImm(CCValues);
+    unsigned CCMask = AlterMasks[I + 1]->getImm();
+    if (CCMask & ~ReusableCCMask)
+      AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) |
+                                (CCValues & ~ReusableCCMask));
+  }
+
+  // CC is now live after PrevCCSetter.
+  int CCDef = PrevCCSetter->findRegisterDefOperandIdx(SystemZ::CC, false,
+                                                      true, TRI);
+  assert(CCDef >= 0 && "Couldn't find CC set");
+  PrevCCSetter->getOperand(CCDef).setIsDead(false);
+
+  // Clear any intervening kills of CC.
+  MBBI = PrevCCSetter;
+  for (++MBBI; MBBI != MBBE; ++MBBI)
+    MBBI->clearRegisterKills(SystemZ::CC, TRI);
+
+  return true;
 }
 
 // Try to fuse compare instruction Compare into a later branch.  Return
@@ -345,6 +491,8 @@
 // that no branches need relaxation.  Return the size of the function under
 // this assumption.
 uint64_t SystemZLongBranch::initMBBInfo() {
+  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+
   MF->RenumberBlocks();
   unsigned NumBlocks = MF->size();
 
@@ -365,13 +513,20 @@
     // Calculate the size of the fixed part of the block.
     MachineBasicBlock::iterator MI = MBB->begin();
     MachineBasicBlock::iterator End = MBB->end();
+    MachineInstr *PrevCCSetter = 0;
     while (MI != End && !MI->isTerminator()) {
       MachineInstr *Current = MI;
       ++MI;
-      if (Current->isCompare() && fuseCompareAndBranch(Current))
-        Current->removeFromParent();
-      else
-        Block.Size += TII->getInstSizeInBytes(Current);
+      if (Current->isCompare()) {
+        if ((PrevCCSetter && optimizeCompareZero(PrevCCSetter, Current)) ||
+            fuseCompareAndBranch(Current)) {
+          Current->removeFromParent();
+          continue;
+        }
+      }
+      if (Current->modifiesRegister(SystemZ::CC, TRI))
+        PrevCCSetter = Current;
+      Block.Size += TII->getInstSizeInBytes(Current);
     }
     skipNonTerminators(Position, Block);