AMDGPU/GlobalISel: Select G_FABS/G_FNEG
f64 doesn't work yet because tablegen currently doesn't handlde
REG_SEQUENCE.
This does regress some multi use VALU fneg cases since now the
immediate remains in an SGPR, and more moves are used for legalizing
the xor. This is a SIFixSGPRCopies deficiency.
llvm-svn: 371540
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index aa65fb6..9ea4a81 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -313,7 +313,7 @@
auto &FPOpActions = getActionDefinitionsBuilder(
- { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
+ { G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE})
.legalFor({S32, S64});
auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
.customFor({S32, S64});
@@ -345,9 +345,6 @@
.scalarize(0);
}
- // TODO: Implement
- getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
-
if (ST.hasVOP3PInsts())
FPOpActions.clampMaxNumElements(0, S16, 2);
@@ -359,6 +356,15 @@
.scalarize(0)
.clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
+ getActionDefinitionsBuilder({G_FNEG, G_FABS})
+ .legalFor(FPTypesPK16)
+ .clampMaxNumElements(0, S16, 2)
+ .scalarize(0)
+ .clampScalar(0, S16, S64);
+
+ // TODO: Implement
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
+
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder(G_FSQRT)
.legalFor({S32, S64, S16})