AMDGPU: Allow f32 types for llvm.amdgcn.s.buffer.load
llvm-svn: 348625
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a98183b..ff63c1f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4873,12 +4873,13 @@
SmallVector<SDValue, 4> Loads;
unsigned NumLoads = 1;
MVT LoadVT = VT.getSimpleVT();
+ MVT EltVT = LoadVT.isVector() ? LoadVT.getVectorElementType() : LoadVT;
+ unsigned NumElts = LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
+ assert((EltVT == MVT::i32 || EltVT == MVT::f32) &&
+ isPowerOf2_32(NumElts));
- assert(LoadVT == MVT::i32 || LoadVT == MVT::v2i32 || LoadVT == MVT::v4i32 ||
- LoadVT == MVT::v8i32 || LoadVT == MVT::v16i32);
-
- if (VT == MVT::v8i32 || VT == MVT::v16i32) {
- NumLoads = VT == MVT::v16i32 ? 4 : 2;
+ if (NumElts == 8 || NumElts == 16) {
+ NumLoads = NumElts == 16 ? 4 : 2;
LoadVT = MVT::v4i32;
}
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 8bd7de7..8a063e1 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -751,6 +751,12 @@
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>;
+
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
} // End let AddedComplexity = 100
let OtherPredicates = [isSICI] in {