AMDGPU: Fold v_lshl_or_b32 with 0 src0 Appears from expansion of some packed cases. llvm-svn: 339025

commit: 0d1b3934e27f72f706345678204a87f72007ed64 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Mon Aug 06 15:40:20 2018 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Mon Aug 06 15:40:20 2018 +0000
tree: 8c1ae6eb70d8fa4404206b66ad42fea39785cf1a
parent: 58e0322545a9a536270f40883339b64d6d690618 [diff]
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 338cabc..e2ba288 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

@@ -550,6 +550,19 @@
   if (!Src0->isImm() && !Src1->isImm())
     return false;
 
+  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
+    if (Src0->isImm() && Src0->getImm() == 0) {
+      // v_lshl_or_b32 0, X, Y -> copy Y
+      // v_lshl_or_b32 0, X, K -> v_mov_b32 K
+      bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
+      MI->RemoveOperand(Src1Idx);
+      MI->RemoveOperand(Src0Idx);
+
+      MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
+      return true;
+    }
+  }
+
   // and k0, k1 -> v_mov_b32 (k0 & k1)
   // or k0, k1 -> v_mov_b32 (k0 | k1)
   // xor k0, k1 -> v_mov_b32 (k0 ^ k1)

diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index 11d7612..82a15b3 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir

@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
 ...
 
 # GCN-LABEL: name: s_fold_and_imm_regimm_32{{$}}
@@ -831,3 +831,75 @@
     S_ENDPGM implicit $vcc
 
 ...
+---
+# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}}
+# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %2
+
+name: constant_fold_lshl_or_reg0_immreg_reg
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+  %2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec
+  S_ENDPGM implicit %2
+
+...
+
+---
+
+# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}}
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %2
+
+name: constant_fold_lshl_or_reg0_immreg_imm
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+
+  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+  %2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec
+  S_ENDPGM implicit %2
+
+...
+
+---
+
+# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
+# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %3
+
+name: constant_fold_lshl_or_reg0_immreg_immreg
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+
+  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+  %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+  %3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec
+  S_ENDPGM implicit %3
+
+...
commit	0d1b3934e27f72f706345678204a87f72007ed64	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Mon Aug 06 15:40:20 2018 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Mon Aug 06 15:40:20 2018 +0000
tree	8c1ae6eb70d8fa4404206b66ad42fea39785cf1a
parent	58e0322545a9a536270f40883339b64d6d690618 [diff]