PTX: Fix various codegen issues

- Emit mad instead of mad.rn for shader model 1.0
- Emit explicit mov.u32 instructions for reading global variables
- (most PTX instructions cannot take global variable immediates)

llvm-svn: 127895
diff --git a/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
index dd3e895..27c9605 100644
--- a/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -313,13 +313,23 @@
     const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy);
     elementTy = arrayTy->getElementType();
 
+    unsigned numElements = arrayTy->getNumElements();
+
+    while (elementTy->isArrayTy()) {
+
+      arrayTy = dyn_cast<const ArrayType>(elementTy);
+      elementTy = arrayTy->getElementType();
+
+      numElements *= arrayTy->getNumElements();
+    }
+
     // FIXME: isPrimitiveType() == false for i16?
     assert(elementTy->isSingleValueType() &&
            "Non-primitive types are not handled");
 
     // Compute the size of the array, in bytes.
     uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
-                         * arrayTy->getNumElements();
+                       * numElements;
 
     decl += ".b8 ";
     decl += gvsym->getName();
diff --git a/llvm/lib/Target/PTX/PTXISelLowering.cpp b/llvm/lib/Target/PTX/PTXISelLowering.cpp
index 159a27a..1a23bc2 100644
--- a/llvm/lib/Target/PTX/PTXISelLowering.cpp
+++ b/llvm/lib/Target/PTX/PTXISelLowering.cpp
@@ -64,6 +64,8 @@
   switch (Opcode) {
     default:
       llvm_unreachable("Unknown opcode");
+    case PTXISD::COPY_ADDRESS:
+      return "PTXISD::COPY_ADDRESS";
     case PTXISD::READ_PARAM:
       return "PTXISD::READ_PARAM";
     case PTXISD::EXIT:
@@ -82,7 +84,14 @@
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  return DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+
+  SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+  SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
+                                 dl,
+                                 MVT::i32,
+                                 targetGlobal);
+
+  return movInstr;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/PTX/PTXISelLowering.h b/llvm/lib/Target/PTX/PTXISelLowering.h
index b03a9f6..c69c416 100644
--- a/llvm/lib/Target/PTX/PTXISelLowering.h
+++ b/llvm/lib/Target/PTX/PTXISelLowering.h
@@ -26,7 +26,8 @@
     FIRST_NUMBER = ISD::BUILTIN_OP_END,
     READ_PARAM,
     EXIT,
-    RET
+    RET,
+    COPY_ADDRESS
   };
 } // namespace PTXISD
 
diff --git a/llvm/lib/Target/PTX/PTXInstrInfo.td b/llvm/lib/Target/PTX/PTXInstrInfo.td
index 42671b6..95768ac 100644
--- a/llvm/lib/Target/PTX/PTXInstrInfo.td
+++ b/llvm/lib/Target/PTX/PTXInstrInfo.td
@@ -175,6 +175,8 @@
   : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
 def PTXret
   : SDNode<"PTXISD::RET",  SDTNone, [SDNPHasChain]>;
+def PTXcopyaddress
+  : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
 
 //===----------------------------------------------------------------------===//
 // Instruction Class Templates
@@ -441,7 +443,8 @@
 // In the short term, mad is supported on all PTX versions and we use a
 // default rounding mode no matter what shader model or PTX version.
 // TODO: Allow the rounding mode to be selectable through llc.
-defm FMAD : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>;
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13]>;
 
 ///===- Floating-Point Intrinsic Instructions -----------------------------===//
 
@@ -533,6 +536,12 @@
               [(set RRegf64:$d, fpimm:$a)]>;
 }
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def MOVaddr
+    : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+}
+
 // Loads
 defm LDg : PTX_LD_ALL<"ld.global", load_global>;
 defm LDc : PTX_LD_ALL<"ld.const",  load_constant>;