[AMDGPU] Avoid predicated execution of the basic blocks containing scalar instructions. Differential revision: https://reviews.llvm.org/D38293 llvm-svn: 314828

commit: 46513965842fc117f3afb0c8ee2fb0fd329d78ae [log] [tgz]
author: Alexander Timofeev <Alexander.Timofeev@amd.com> Tue Oct 03 18:55:36 2017 +0000
committer: Alexander Timofeev <Alexander.Timofeev@amd.com> Tue Oct 03 18:55:36 2017 +0000
tree: be3f041b4a092a1a5ee8b5fb7c3dc3600fac7b96
parent: 6b1be121c04b52ae105beee5065fc4b582f13388 [diff] [blame]
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index ba346d2..9bd58c4 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp

@@ -132,6 +132,16 @@
           I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
         return true;
 
+      // V_READFIRSTLANE/V_READLANE destination register may be used as operand
+      // by some SALU instruction. If exec mask is zero vector instruction
+      // defining the register that is used by the scalar one is not executed
+      // and scalar instruction will operate on undefined data. For
+      // V_READFIRSTLANE/V_READLANE we should avoid predicated execution.
+      if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) ||
+          (I->getOpcode() == AMDGPU::V_READLANE_B32)) {
+        return true;
+      }
+
       if (I->isInlineAsm()) {
         const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
         const char *AsmStr = I->getOperand(0).getSymbolName();
commit	46513965842fc117f3afb0c8ee2fb0fd329d78ae	[log] [tgz]
author	Alexander Timofeev <Alexander.Timofeev@amd.com>	Tue Oct 03 18:55:36 2017 +0000
committer	Alexander Timofeev <Alexander.Timofeev@amd.com>	Tue Oct 03 18:55:36 2017 +0000
tree	be3f041b4a092a1a5ee8b5fb7c3dc3600fac7b96
parent	6b1be121c04b52ae105beee5065fc4b582f13388 [diff] [blame]