Add ‘llvm.experimental.constrained.fma‘ Intrinsic.
Differential Revision: http://reviews.llvm.org/D36335
llvm-svn: 311629
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index d416621..49ded73 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -13021,6 +13021,41 @@
value operands and has the same type as the operands. The remainder has the
same sign as the dividend.
+'``llvm.experimental.constrained.fma``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare <type>
+ @llvm.experimental.constrained.fma(<type> <op1>, <type> <op2>, <type> <op3>,
+ metadata <rounding mode>,
+ metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.fma``' intrinsic returns the result of a
+fused-multiply-add operation on its operands.
+
+Arguments:
+""""""""""
+
+The first three arguments to the '``llvm.experimental.constrained.fma``'
+intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
+<t_vector>` of floating point values. All arguments must have identical types.
+
+The fourth and fifth arguments specify the rounding mode and exception behavior
+as described above.
+
+Semantics:
+""""""""""
+
+The result produced is the product of the first two operands added to the third
+operand computed with infinite precision, and then rounded to the target
+precision.
Constrained libm-equivalent Intrinsics
--------------------------------------
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index bc5d235..ffcf3fa 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -263,6 +263,7 @@
/// They are used to limit optimizations while the DAG is being
/// optimized.
STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM,
+ STRICT_FMA,
/// Constrained versions of libm-equivalent floating point intrinsics.
/// These will be lowered to the equivalent non-constrained pseudo-op
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index db42fb6..3f88ed2 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -623,13 +623,14 @@
/// Test if this node is a strict floating point pseudo-op.
bool isStrictFPOpcode() {
switch (NodeType) {
- default:
+ default:
return false;
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
case ISD::STRICT_FREM:
+ case ISD::STRICT_FMA:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FPOW:
case ISD::STRICT_FPOWI:
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index aad49bc..f58ee21 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -167,6 +167,7 @@
};
bool isUnaryOp() const;
+ bool isTernaryOp() const;
RoundingMode getRoundingMode() const;
ExceptionBehavior getExceptionBehavior() const;
@@ -178,6 +179,7 @@
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e2cd40b..55b284a 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -490,6 +490,13 @@
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+
// These intrinsics are sensitive to the rounding mode so we need constrained
// versions of each of them. When strict rounding and exception control are
// not required the non-constrained versions of these intrinsics should be
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7a208cb..e0825e8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -907,6 +907,7 @@
case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
@@ -1072,6 +1073,7 @@
}
break;
case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
case ISD::STRICT_FPOWI:
case ISD::STRICT_FSIN:
@@ -1240,7 +1242,7 @@
// If the index is dependent on the store we will introduce a cycle when
// creating the load (the load uses the index, and by replacing the chain
// we will make the index dependent on the load). Also, the store might be
- // dependent on the extractelement and introduce a cycle when creating
+ // dependent on the extractelement and introduce a cycle when creating
// the load.
if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
ST->hasPredecessor(Op.getNode()))
@@ -4065,6 +4067,10 @@
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
+ case ISD::STRICT_FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128));
break;
case ISD::FADD:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b415421..d00ecaf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6695,6 +6695,7 @@
unsigned OrigOpc = Node->getOpcode();
unsigned NewOpc;
bool IsUnary = false;
+ bool IsTernary = false;
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
@@ -6703,6 +6704,7 @@
case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+ case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
@@ -6729,6 +6731,10 @@
SDNode *Res = nullptr;
if (IsUnary)
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+ else if (IsTernary)
+ Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+ Node->getOperand(2),
+ Node->getOperand(3)});
else
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
Node->getOperand(2) });
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fc441b8..1371f52 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5432,6 +5432,7 @@
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@@ -5963,6 +5964,9 @@
case Intrinsic::experimental_constrained_frem:
Opcode = ISD::STRICT_FREM;
break;
+ case Intrinsic::experimental_constrained_fma:
+ Opcode = ISD::STRICT_FMA;
+ break;
case Intrinsic::experimental_constrained_sqrt:
Opcode = ISD::STRICT_FSQRT;
break;
@@ -6009,10 +6013,15 @@
SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result;
if (FPI.isUnaryOp())
- Result = DAG.getNode(Opcode, sdl, VTs,
+ Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)) });
+ else if (FPI.isTernaryOp())
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ getValue(FPI.getArgOperand(1)),
+ getValue(FPI.getArgOperand(2)) });
else
- Result = DAG.getNode(Opcode, sdl, VTs,
+ Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)),
getValue(FPI.getArgOperand(1)) });
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 8b12c55..67bd5b6 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -14,10 +14,10 @@
// are all subclasses of the CallInst class. Note that none of these classes
// has state or virtual methods, which is an important part of this gross/neat
// hack working.
-//
+//
// In some cases, arguments to intrinsics need to be generic and are defined as
// type pointer to empty struct { }*. To access the real item of interest the
-// cast instruction needs to be stripped away.
+// cast instruction needs to be stripped away.
//
//===----------------------------------------------------------------------===//
@@ -98,7 +98,7 @@
ConstrainedFPIntrinsic::RoundingMode
ConstrainedFPIntrinsic::getRoundingMode() const {
unsigned NumOperands = getNumArgOperands();
- Metadata *MD =
+ Metadata *MD =
dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
if (!MD || !isa<MDString>(MD))
return rmInvalid;
@@ -118,7 +118,7 @@
ConstrainedFPIntrinsic::ExceptionBehavior
ConstrainedFPIntrinsic::getExceptionBehavior() const {
unsigned NumOperands = getNumArgOperands();
- Metadata *MD =
+ Metadata *MD =
dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
if (!MD || !isa<MDString>(MD))
return ebInvalid;
@@ -132,7 +132,7 @@
bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
- default:
+ default:
return false;
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_sin:
@@ -147,3 +147,13 @@
return true;
}
}
+
+bool ConstrainedFPIntrinsic::isTernaryOp() const {
+ switch (getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::experimental_constrained_fma:
+ return true;
+ }
+}
+
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index c5cee49..0cd6dbb 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3973,6 +3973,7 @@
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@@ -4433,8 +4434,9 @@
void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
unsigned NumOperands = FPI.getNumArgOperands();
- Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
- "invalid arguments for constrained FP intrinsic", &FPI);
+ Assert(((NumOperands == 5 && FPI.isTernaryOp()) ||
+ (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
+ "invalid arguments for constrained FP intrinsic", &FPI);
Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-1)),
"invalid exception behavior argument", &FPI);
Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-2)),
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index 0f8d730..eae3955 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s
; Verify that constants aren't folded to inexact results when the rounding mode
; is unknown.
@@ -9,7 +10,7 @@
; }
;
; CHECK-LABEL: f1
-; CHECK: divsd
+; COMMON: divsd
define double @f1() {
entry:
%div = call double @llvm.experimental.constrained.fdiv.f64(
@@ -29,7 +30,7 @@
; }
;
; CHECK-LABEL: f2
-; CHECK: subsd
+; COMMON: subsd
define double @f2(double %a) {
entry:
%div = call double @llvm.experimental.constrained.fsub.f64(
@@ -50,9 +51,9 @@
; }
;
; CHECK-LABEL: f3:
-; CHECK: subsd
-; CHECK: mulsd
-; CHECK: subsd
+; COMMON: subsd
+; COMMON: mulsd
+; COMMON: subsd
define double @f3(double %a, double %b) {
entry:
%sub = call double @llvm.experimental.constrained.fsub.f64(
@@ -81,11 +82,11 @@
; return a;
; }
;
-;
+;
; CHECK-LABEL: f4:
-; CHECK: testl
-; CHECK: jle
-; CHECK: addsd
+; COMMON: testl
+; COMMON: jle
+; COMMON: addsd
define double @f4(i32 %n, double %a) {
entry:
%cmp = icmp sgt i32 %n, 0
@@ -105,7 +106,7 @@
; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f5
-; CHECK: sqrtsd
+; COMMON: sqrtsd
define double @f5() {
entry:
%result = call double @llvm.experimental.constrained.sqrt.f64(double 42.0,
@@ -116,7 +117,7 @@
; Verify that pow(42.1, 3.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f6
-; CHECK: pow
+; COMMON: pow
define double @f6() {
entry:
%result = call double @llvm.experimental.constrained.pow.f64(double 42.1,
@@ -128,7 +129,7 @@
; Verify that powi(42.1, 3) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f7
-; CHECK: powi
+; COMMON: powi
define double @f7() {
entry:
%result = call double @llvm.experimental.constrained.powi.f64(double 42.1,
@@ -140,7 +141,7 @@
; Verify that sin(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f8
-; CHECK: sin
+; COMMON: sin
define double @f8() {
entry:
%result = call double @llvm.experimental.constrained.sin.f64(double 42.0,
@@ -151,7 +152,7 @@
; Verify that cos(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f9
-; CHECK: cos
+; COMMON: cos
define double @f9() {
entry:
%result = call double @llvm.experimental.constrained.cos.f64(double 42.0,
@@ -162,7 +163,7 @@
; Verify that exp(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f10
-; CHECK: exp
+; COMMON: exp
define double @f10() {
entry:
%result = call double @llvm.experimental.constrained.exp.f64(double 42.0,
@@ -173,7 +174,7 @@
; Verify that exp2(42.1) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f11
-; CHECK: exp2
+; COMMON: exp2
define double @f11() {
entry:
%result = call double @llvm.experimental.constrained.exp2.f64(double 42.1,
@@ -184,7 +185,7 @@
; Verify that log(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f12
-; CHECK: log
+; COMMON: log
define double @f12() {
entry:
%result = call double @llvm.experimental.constrained.log.f64(double 42.0,
@@ -195,7 +196,7 @@
; Verify that log10(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f13
-; CHECK: log10
+; COMMON: log10
define double @f13() {
entry:
%result = call double @llvm.experimental.constrained.log10.f64(double 42.0,
@@ -206,7 +207,7 @@
; Verify that log2(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f14
-; CHECK: log2
+; COMMON: log2
define double @f14() {
entry:
%result = call double @llvm.experimental.constrained.log2.f64(double 42.0,
@@ -217,7 +218,8 @@
; Verify that rint(42.1) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f15
-; CHECK: rint
+; NO-FMA: rint
+; HAS-FMA: vroundsd
define double @f15() {
entry:
%result = call double @llvm.experimental.constrained.rint.f64(double 42.1,
@@ -229,7 +231,8 @@
; Verify that nearbyint(42.1) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f16
-; CHECK: nearbyint
+; NO-FMA: nearbyint
+; HAS-FMA: vroundsd
define double @f16() {
entry:
%result = call double @llvm.experimental.constrained.nearbyint.f64(
@@ -239,6 +242,38 @@
ret double %result
}
+; Verify that fma(3.5) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; FMACALL32: jmp fmaf # TAILCALL
+; FMA32: vfmadd213ss
+define float @f17() {
+entry:
+ %result = call float @llvm.experimental.constrained.fma.f32(
+ float 3.5,
+ float 3.5,
+ float 3.5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %result
+}
+
+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f18
+; FMACALL64: jmp fma # TAILCALL
+; FMA64: vfmadd213sd
+define double @f18() {
+entry:
+ %result = call double @llvm.experimental.constrained.fma.f64(
+ double 42.1,
+ double 42.1,
+ double 42.1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %result
+}
+
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@@ -256,3 +291,5 @@
declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll
index f21ba15..e1b253d 100644
--- a/llvm/test/Feature/fp-intrinsics.ll
+++ b/llvm/test/Feature/fp-intrinsics.ll
@@ -73,7 +73,7 @@
; return a;
; }
;
-;
+;
; CHECK-LABEL: @f4
; CHECK-NOT: select
; CHECK: br i1 %cmp
@@ -94,7 +94,6 @@
ret double %a.0
}
-
; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f5
; CHECK: call double @llvm.experimental.constrained.sqrt
@@ -231,6 +230,18 @@
ret double %result
}
+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; CHECK: call double @llvm.experimental.constrained.fma
+define double @f17() {
+entry:
+ %result = call double @llvm.experimental.constrained.fma.f64(double 42.1, double 42.1, double 42.1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %result
+}
+
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@@ -248,3 +259,4 @@
declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)