AMDGPU/GlobalISel: Legalize sin/cos
llvm-svn: 370402
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 87d4e99..1562523 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -275,12 +275,16 @@
auto &FPOpActions = getActionDefinitionsBuilder(
{ G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
.legalFor({S32, S64});
+ auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
+ .customFor({S32, S64});
if (ST.has16BitInsts()) {
if (ST.hasVOP3PInsts())
FPOpActions.legalFor({S16, V2S16});
else
FPOpActions.legalFor({S16});
+
+ TrigActions.customFor({S16});
}
auto &MinNumMaxNum = getActionDefinitionsBuilder({
@@ -306,10 +310,15 @@
if (ST.hasVOP3PInsts())
FPOpActions.clampMaxNumElements(0, S16, 2);
+
FPOpActions
.scalarize(0)
.clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
+ TrigActions
+ .scalarize(0)
+ .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
+
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder(G_FSQRT)
.legalFor({S32, S64, S16})
@@ -816,6 +825,9 @@
return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FCOS:
+ return legalizeSinCos(MI, MRI, MIRBuilder);
default:
return false;
}
@@ -1231,6 +1243,35 @@
return true;
}
+bool AMDGPULegalizerInfo::legalizeSinCos(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ unsigned Flags = MI.getFlags();
+
+ Register TrigVal;
+ auto OneOver2Pi = B.buildFConstant(Ty, 0.5 / M_PI);
+ if (ST.hasTrigReducedRange()) {
+ auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
+ TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
+ .addUse(MulVal.getReg(0))
+ .setMIFlags(Flags).getReg(0);
+ } else
+ TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
+
+ Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
+ Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
+ B.buildIntrinsic(TrigIntrin, makeArrayRef<Register>(DstReg), false)
+ .addUse(TrigVal)
+ .setMIFlags(Flags);
+ MI.eraseFromParent();
+ return true;
+}
+
// Return the use branch instruction, otherwise null if the usage is invalid.
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI) {