[SystemZ] Support conditional indirect sibling calls via BCR

This adds a conditional variant of CallBR instruction, CallBCR. Also,
it can be fused with integer comparisons, resulting in one of the new
C*BCall instructions.

In addition to CallBRCL limitations, this has another one: it won't
trigger if the function to call isn't already in %r1 - see f22 in the
test for an example (it's also why the loads in tests are volatile).

Author: koriakin
Differential Revision: http://reviews.llvm.org/D18928

llvm-svn: 265933
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index f975bcc..9d8fb8c 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -216,6 +216,85 @@
     LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
     break;
 
+  case SystemZ::CallBCR:
+    LoweredMI = MCInstBuilder(SystemZ::BCR)
+      .addImm(MI->getOperand(0).getImm())
+      .addImm(MI->getOperand(1).getImm())
+      .addReg(SystemZ::R1D);
+    break;
+
+  case SystemZ::CRBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CGRBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CGRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CIBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CGIBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CGIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLRBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CLRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLGRBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CLGRB)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLIBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CLIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
+  case SystemZ::CLGIBCall:
+    LoweredMI = MCInstBuilder(SystemZ::CLGIB)
+      .addReg(MI->getOperand(0).getReg())
+      .addImm(MI->getOperand(1).getImm())
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(SystemZ::R1D)
+      .addImm(0);
+    break;
+
   case SystemZ::TLS_GDCALL:
     LoweredMI = MCInstBuilder(SystemZ::BRASL)
       .addReg(SystemZ::R14D)
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 59a3670..247cd18 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -392,6 +392,9 @@
   case SystemZ::CondReturn:
     Type = SystemZII::CompareAndReturn;
     break;
+  case SystemZ::CallBCR:
+    Type = SystemZII::CompareAndSibcall;
+    break;
   default:
     return false;
   }
@@ -412,19 +415,24 @@
         (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
       return false;
 
-  // Read the branch mask and target (if applicable).
+  // Read the branch mask, target (if applicable), regmask (if applicable).
   MachineOperand CCMask(MBBI->getOperand(1));
   assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
          "Invalid condition-code mask for integer comparison");
   // This is only valid for CompareAndBranch.
   MachineOperand Target(MBBI->getOperand(
     Type == SystemZII::CompareAndBranch ? 2 : 0));
+  const uint32_t *RegMask;
+  if (Type == SystemZII::CompareAndSibcall)
+    RegMask = MBBI->getOperand(2).getRegMask();
 
   // Clear out all current operands.
   int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
   assert(CCUse >= 0 && "BRC/BCR must use CC");
   Branch->RemoveOperand(CCUse);
-  if (Type == SystemZII::CompareAndBranch)
+  // Remove target (branch) or regmask (sibcall).
+  if (Type == SystemZII::CompareAndBranch ||
+      Type == SystemZII::CompareAndSibcall)
     Branch->RemoveOperand(2);
   Branch->RemoveOperand(1);
   Branch->RemoveOperand(0);
@@ -444,6 +452,9 @@
        .addReg(SystemZ::CC, RegState::ImplicitDefine);
   }
 
+  if (Type == SystemZII::CompareAndSibcall)
+    MIB.addRegMask(RegMask);
+
   // Clear any intervening kills of SrcReg and SrcReg2.
   MBBI = Compare;
   for (++MBBI; MBBI != MBBE; ++MBBI) {
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 8bb548e..79bd0ef 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -511,7 +511,8 @@
   if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode))
     return true;
   if (Opcode == SystemZ::Return ||
-      Opcode == SystemZ::CallJG)
+      Opcode == SystemZ::CallJG ||
+      Opcode == SystemZ::CallBR)
     return true;
   return false;
 }
@@ -585,6 +586,16 @@
       .addReg(SystemZ::CC, RegState::Implicit);
     return true;
   }
+  if (Opcode == SystemZ::CallBR) {
+    const uint32_t *RegMask = MI.getOperand(0).getRegMask();
+    MI.RemoveOperand(0);
+    MI.setDesc(get(SystemZ::CallBCR));
+    MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+      .addImm(CCValid).addImm(CCMask)
+      .addRegMask(RegMask)
+      .addReg(SystemZ::CC, RegState::Implicit);
+    return true;
+  }
   return false;
 }
 
@@ -1348,6 +1359,27 @@
     default:
       return 0;
     }
+  case SystemZII::CompareAndSibcall:
+    switch (Opcode) {
+    case SystemZ::CR:
+      return SystemZ::CRBCall;
+    case SystemZ::CGR:
+      return SystemZ::CGRBCall;
+    case SystemZ::CHI:
+      return SystemZ::CIBCall;
+    case SystemZ::CGHI:
+      return SystemZ::CGIBCall;
+    case SystemZ::CLR:
+      return SystemZ::CLRBCall;
+    case SystemZ::CLGR:
+      return SystemZ::CLGRBCall;
+    case SystemZ::CLFI:
+      return SystemZ::CLIBCall;
+    case SystemZ::CLGFI:
+      return SystemZ::CLGIBCall;
+    default:
+      return 0;
+    }
   }
   return 0;
 }
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index baa126f..3cb73ed 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -119,7 +119,10 @@
   CompareAndBranch,
 
   // Indirect branch, used for return - CRBReturn etc.
-  CompareAndReturn
+  CompareAndReturn,
+
+  // Indirect branch, used for sibcall - CRBCall etc.
+  CompareAndSibcall
 };
 } // end namespace SystemZII
 
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 101ca01..3628aa0 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -327,10 +327,26 @@
     def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
 }
 
-let CCMaskFirst = 1, isCall = 1, isReturn = 1 in
+let CCMaskFirst = 1, isCall = 1, isTerminator = 1, isReturn = 1 in {
   def CallBRCL : Alias<6, (outs), (ins cond4:$valid, cond4:$R1,
                                    pcrel32:$I2), []>;
 
+  let Uses = [R1D] in
+    def CallBCR : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+}
+
+// Fused compare and conditional sibling calls.
+let isCall = 1, isTerminator = 1, isReturn = 1, Uses = [R1D] in {
+  def CRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+  def CGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+  def CIBCall : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
+  def CGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
+  def CLRBCall : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+  def CLGRBCall : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+  def CLIBCall : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
+  def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
+}
+
 // TLS calls.  These will be lowered into a call to __tls_get_offset,
 // with an extra relocation specifying the TLS symbol.
 let isCall = 1, Defs = [R14D, CC] in {