[AMDGPU] use v32f32 for 3 mfma intrinsics
These should really use v32f32, but were defined as v32i32
due to the lack of the v32f32 type.
Differential Revision: https://reviews.llvm.org/D64667
llvm-svn: 365972
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3eb1b1c..b90a0d2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -153,6 +153,7 @@
if (Subtarget->hasMAIInsts()) {
addRegisterClass(MVT::v32i32, &AMDGPU::AReg_1024RegClass);
+ addRegisterClass(MVT::v32f32, &AMDGPU::AReg_1024RegClass);
}
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -263,8 +264,9 @@
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
- for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
- MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v32i32 }) {
+ for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
+ MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16,
+ MVT::v32i32, MVT::v32f32 }) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD: