AMDGPU/GlobalISel: legalize and select 32-bit G_ASHR

Reviewers: arsenm, nhaehnle

Subscribers: kzhuravl, wdng, yaxunl, rovka, kristof.beyls, dstuttard, tpr, llvm-commits, t-tye

Differential Revision: https://reviews.llvm.org/D48196

llvm-svn: 335318
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index a5406ae..fd1b293 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -18,6 +18,11 @@
     GIComplexOperandMatcher<s32, "selectVSRC0">,
     GIComplexPatternEquiv<sd_vsrc0>;
 
+def sd_vcsrc : ComplexPattern<i32, 1, "">;
+def gi_vcsrc :
+    GIComplexOperandMatcher<s32, "selectVCSRC">,
+    GIComplexPatternEquiv<sd_vcsrc>;
+
 def gi_vop3mods0 :
     GIComplexOperandMatcher<s32, "selectVOP3Mods0">,
     GIComplexPatternEquiv<VOP3Mods0>;
@@ -60,6 +65,26 @@
   (inst src0_vt:$src0, src1_vt:$src1)
 >;
 
+class GISelVop3Pat2 <
+  SDPatternOperator node,
+  Instruction inst,
+  ValueType dst_vt,
+  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat <
+
+  (dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))),
+  (inst src0_vt:$src0, src1_vt:$src1)
+>;
+
+class GISelVop3Pat2CommutePat <
+  SDPatternOperator node,
+  Instruction inst,
+  ValueType dst_vt,
+  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat <
+
+  (dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))),
+  (inst src0_vt:$src1, src1_vt:$src0)
+>;
+
 multiclass GISelVop2IntrPat <
   SDPatternOperator node, Instruction inst,
   ValueType dst_vt, ValueType src_vt = dst_vt> {
@@ -76,6 +101,15 @@
 def : GISelSop2Pat <or, S_OR_B32, i32>;
 def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
 
+def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
+let AddedComplexity = 100 in {
+let SubtargetPredicate = isSICI in {
+def : GISelVop2Pat <sra, V_ASHR_I32_e32, i32>;
+}
+def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>;
+}
+def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>;
+
 // FIXME: Select directly to _e32 so we don't need to deal with modifiers.
 // FIXME: We can't re-use SelectionDAG patterns here because they match
 // against a custom SDNode and we would need to create a generic machine
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f98f57e..80a1bc9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -537,6 +537,7 @@
   switch (I.getOpcode()) {
   default:
     break;
+  case TargetOpcode::G_ASHR:
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_FMUL:
   case TargetOpcode::G_FADD:
@@ -564,6 +565,14 @@
   return false;
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
+  return {{
+      [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
+  }};
+
+}
+
 ///
 /// This will select either an SGPR or VGPR operand and will save us from
 /// having to write an extra tablegen pattern.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 8283ab5..fee1041 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -73,6 +73,9 @@
   bool selectG_STORE(MachineInstr &I) const;
 
   InstructionSelector::ComplexRendererFns
+  selectVCSRC(MachineOperand &Root) const;
+
+  InstructionSelector::ComplexRendererFns
   selectVSRC0(MachineOperand &Root) const;
 
   InstructionSelector::ComplexRendererFns
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c429833..ecfa201 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -55,6 +55,7 @@
   };
 
   setAction({G_ADD, S32}, Legal);
+  setAction({G_ASHR, S32}, Legal);
   setAction({G_SUB, S32}, Legal);
   setAction({G_MUL, S32}, Legal);
   setAction({G_AND, S32}, Legal);