AMDGPU: VALU carry-in and v_cndmask condition cannot be EXEC

The hardware will only forward EXEC_LO; the high 32 bits will be zero.

Additionally, inline constants do not work. At least,

   v_addc_u32_e64 v0, vcc, v0, v1, -1

which could conceivably be used to combine (v0 + v1 + 1) into a single
instruction, acts as if all carry-in bits are zero.

The llvm.amdgcn.ps.live test is adjusted; it would be nice to combine

   s_mov_b64 s[0:1], exec
   v_cndmask_b32_e64 v0, v1, v2, s[0:1]

into

   v_mov_b32 v0, v3

but it's not particularly high priority.

Fixes dEQP-GLES31.functional.shaders.helper_invocation.value.*

llvm-svn: 314522
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6a751d7..3a125c2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3012,15 +3012,18 @@
 
     unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    unsigned SrcCondCopy = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
 
+    BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
+      .addReg(SrcCond);
     BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
       .addReg(Src0, 0, AMDGPU::sub0)
       .addReg(Src1, 0, AMDGPU::sub0)
-      .addReg(SrcCond);
+      .addReg(SrcCondCopy);
     BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
       .addReg(Src0, 0, AMDGPU::sub1)
       .addReg(Src1, 0, AMDGPU::sub1)
-      .addReg(SrcCond);
+      .addReg(SrcCondCopy);
 
     BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst)
       .addReg(DstLo)