AMDGPU: Select DS insts without m0 initialization
GFX9 stopped using m0 for most DS instructions. Select
a different instruction without the use. I think this will
be less error prone than trying to manually maintain m0
uses as needed.
llvm-svn: 319270
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 48bfc2d..840c7fb 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -112,7 +112,13 @@
static bool offsetsCanBeCombined(CombineInfo &CI);
bool findMatchingInst(CombineInfo &CI);
+
+ unsigned read2Opcode(unsigned EltSize) const;
+ unsigned read2ST64Opcode(unsigned EltSize) const;
MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI);
+
+ unsigned write2Opcode(unsigned EltSize) const;
+ unsigned write2ST64Opcode(unsigned EltSize) const;
MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI);
MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI);
@@ -436,6 +442,20 @@
return false;
}
+unsigned SILoadStoreOptimizer::read2Opcode(unsigned EltSize) const {
+ if (STM->ldsRequiresM0Init())
+ return (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
+ return (EltSize == 4) ? AMDGPU::DS_READ2_B32_gfx9 : AMDGPU::DS_READ2_B64_gfx9;
+}
+
+unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const {
+ if (STM->ldsRequiresM0Init())
+ return (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
+
+ return (EltSize == 4) ?
+ AMDGPU::DS_READ2ST64_B32_gfx9 : AMDGPU::DS_READ2ST64_B64_gfx9;
+}
+
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
@@ -449,12 +469,8 @@
unsigned NewOffset0 = CI.Offset0;
unsigned NewOffset1 = CI.Offset1;
- unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2_B32
- : AMDGPU::DS_READ2_B64;
-
- if (CI.UseST64)
- Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2ST64_B32
- : AMDGPU::DS_READ2ST64_B64;
+ unsigned Opc = CI.UseST64 ?
+ read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
unsigned SubRegIdx0 = (CI.EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
unsigned SubRegIdx1 = (CI.EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
@@ -517,6 +533,20 @@
return Next;
}
+unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const {
+ if (STM->ldsRequiresM0Init())
+ return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
+ return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32_gfx9 : AMDGPU::DS_WRITE2_B64_gfx9;
+}
+
+unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const {
+ if (STM->ldsRequiresM0Init())
+ return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
+
+ return (EltSize == 4) ?
+ AMDGPU::DS_WRITE2ST64_B32_gfx9 : AMDGPU::DS_WRITE2ST64_B64_gfx9;
+}
+
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
@@ -530,12 +560,8 @@
unsigned NewOffset0 = CI.Offset0;
unsigned NewOffset1 = CI.Offset1;
- unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2_B32
- : AMDGPU::DS_WRITE2_B64;
-
- if (CI.UseST64)
- Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32
- : AMDGPU::DS_WRITE2ST64_B64;
+ unsigned Opc = CI.UseST64 ?
+ write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
if (NewOffset0 > NewOffset1) {
// Canonicalize the merged instruction so the smaller offset comes first.
@@ -786,9 +812,13 @@
CombineInfo CI;
CI.I = I;
unsigned Opc = MI.getOpcode();
- if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
+ if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64 ||
+ Opc == AMDGPU::DS_READ_B32_gfx9 || Opc == AMDGPU::DS_READ_B64_gfx9) {
+
CI.InstClass = DS_READ_WRITE;
- CI.EltSize = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
+ CI.EltSize =
+ (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 : 4;
+
if (findMatchingInst(CI)) {
Modified = true;
I = mergeRead2Pair(CI);
@@ -797,10 +827,13 @@
}
continue;
- }
- if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
+ } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64 ||
+ Opc == AMDGPU::DS_WRITE_B32_gfx9 ||
+ Opc == AMDGPU::DS_WRITE_B64_gfx9) {
CI.InstClass = DS_READ_WRITE;
- CI.EltSize = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
+ CI.EltSize
+ = (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 : 4;
+
if (findMatchingInst(CI)) {
Modified = true;
I = mergeWrite2Pair(CI);