ptx: add store instruction

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122652 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index 65386c8..13b1d77 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -66,6 +66,56 @@
   return false;
 }]>;
 
+def store_global
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::GLOBAL;
+  return false;
+}]>;
+
+def store_constant
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::CONSTANT;
+  return false;
+}]>;
+
+def store_local
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::LOCAL;
+  return false;
+}]>;
+
+def store_parameter
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::PARAMETER;
+  return false;
+}]>;
+
+def store_shared
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::SHARED;
+  return false;
+}]>;
+
 // Addressing modes.
 def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
@@ -145,6 +195,21 @@
                    [(set RC:$d, (pat_load ADDRii:$a))]>;
 }
 
+multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
+  def rr : InstPTX<(outs),
+                   (ins RC:$d, MEMrr:$a),
+                   !strconcat(opstr, ".%type\t[$a], $d"),
+                   [(pat_store RC:$d, ADDRrr:$a)]>;
+  def ri : InstPTX<(outs),
+                   (ins RC:$d, MEMri:$a),
+                   !strconcat(opstr, ".%type\t[$a], $d"),
+                   [(pat_store RC:$d, ADDRri:$a)]>;
+  def ii : InstPTX<(outs),
+                   (ins RC:$d, MEMii:$a),
+                   !strconcat(opstr, ".%type\t[$a], $d"),
+                   [(pat_store RC:$d, ADDRii:$a)]>;
+}
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -185,6 +250,12 @@
 defm LDp : PTX_LD<"ld.param",  RRegs32, load_parameter>;
 defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
 
+defm STg : PTX_ST<"st.global", RRegs32, store_global>;
+defm STc : PTX_ST<"st.const",  RRegs32, store_constant>;
+defm STl : PTX_ST<"st.local",  RRegs32, store_local>;
+defm STp : PTX_ST<"st.param",  RRegs32, store_parameter>;
+defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
+
 ///===- Control Flow Instructions -----------------------------------------===//
 
 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index bfeb5be..68b641b 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -22,6 +22,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
+// NOTE: PTXMFInfoExtract must after register allocation!
+
 namespace llvm {
   /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
   /// function information for PTXAsmPrinter
@@ -50,22 +52,38 @@
   PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  DEBUG(dbgs() << "****** PTX FUNCTION LOCAL VAR REG DEF ******\n");
+  DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
 
-  unsigned reg_ret = MFI->retReg();
+  unsigned retreg = MFI->retReg();
+
+  DEBUG(dbgs()
+        << "PTX::NoRegister == " << PTX::NoRegister << "\n"
+        << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
+
+  DEBUG(for (unsigned reg = PTX::NoRegister + 1;
+             reg < PTX::NUM_TARGET_REGS; ++reg)
+          if (MRI.isPhysRegUsed(reg))
+            dbgs() << "Used Reg: " << reg << "\n";);
 
   // FIXME: This is a slow linear scanning
   for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
-    if (MRI.isPhysRegUsed(reg) && reg != reg_ret && !MFI->isArgReg(reg))
+    if (MRI.isPhysRegUsed(reg) && reg != retreg && !MFI->isArgReg(reg))
       MFI->addLocalVarReg(reg);
 
   // Notify MachineFunctionInfo that I've done adding local var reg
   MFI->doneAddLocalVar();
 
+  DEBUG(dbgs() << "Return Reg: " << retreg << "\n");
+
+  DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+             i = MFI->argRegBegin(), e = MFI->argRegEnd();
+	     i != e; ++i)
+        dbgs() << "Arg Reg: " << *i << "\n";);
+
   DEBUG(for (PTXMachineFunctionInfo::reg_iterator
              i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
 	     i != e; ++i)
-        dbgs() << "Used Reg: " << *i << "\n";);
+        dbgs() << "Local Var Reg: " << *i << "\n";);
 
   return false;
 }
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index a041d07e..f3ba499 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -49,6 +49,12 @@
 bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
                                        CodeGenOpt::Level OptLevel) {
   PM.add(createPTXISelDag(*this, OptLevel));
+  return false;
+}
+
+bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // PTXMFInfoExtract must after register allocation!
   PM.add(createPTXMFInfoExtract(*this, OptLevel));
   return false;
 }
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 327ac9f..7f0d282 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -50,6 +50,8 @@
 
     virtual bool addInstSelector(PassManagerBase &PM,
                                  CodeGenOpt::Level OptLevel);
+    virtual bool addPostRegAlloc(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
 }; // class PTXTargetMachine
 } // namespace llvm