AMDGPU: Fix verifier error from partially undef copy
In this situation:
%VGPR2<def> = BUFFER_LOAD_DWORD_OFFSET %SGPR8_SGPR9_SGPR10_SGPR11,
%VGPR7<def,tied3> = V_MAC_F32_e32 %VGPR0<undef>, %VGPR1<kill>, %VGPR7<kill,tied0>, %EXEC<imp-use>
%VGPR3_VGPR4_VGPR5_VGPR6<def> = COPY %VGPR0_VGPR1_VGPR2_VGPR3
%VGPR4<def> = COPY %VGPR2
The copy for VGPR1 -> VGPR4 was an error from reading undefined VGPR1,
but VGPR4 is defined immediately after this copy.
llvm-svn: 275635
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 61a3b6b..d171e21 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -390,7 +390,6 @@
unsigned Opcode;
ArrayRef<int16_t> SubIndices;
- bool Forward;
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
@@ -474,10 +473,7 @@
llvm_unreachable("Can't copy register!");
}
- if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
- Forward = true;
- else
- Forward = false;
+ bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
unsigned SubIdx;
@@ -496,6 +492,8 @@
if (Idx == 0)
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
+
+ Builder.addReg(SrcReg, RegState::Implicit);
}
}