Separately track input and output denormal mode
AMDGPU and x86 at least both have separate controls for whether
denormal results are flushed on output, and for whether denormals are
implicitly treated as 0 as an input. The current DAGCombiner use only
really cares about the input treatment of denormals.
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 6962b60..0a28ede 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -164,10 +164,10 @@
std::string FloatABI;
/// The floating-point denormal mode to use.
- llvm::DenormalMode FPDenormalMode = llvm::DenormalMode::Invalid;
+ llvm::DenormalMode FPDenormalMode;
/// The floating-point subnormal mode to use, for float.
- llvm::DenormalMode FP32DenormalMode = llvm::DenormalMode::Invalid;
+ llvm::DenormalMode FP32DenormalMode;
/// The float precision limit to use, if non-empty.
std::string LimitFloatPrecision;
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 53e00c1..09f1458 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -617,7 +617,7 @@
Action::OffloadKind DeviceOffloadKind,
const llvm::fltSemantics *FPType = nullptr) const {
// FIXME: This should be IEEE when default handling is fixed.
- return llvm::DenormalMode::Invalid;
+ return llvm::DenormalMode::getInvalid();
}
};
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 0aaf681..a34d3d8 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -247,7 +247,7 @@
if (!hasFP32Denormals)
TargetOpts.Features.push_back(
(Twine(hasFastFMAF() && hasFullRateDenormalsF32() &&
- CGOpts.FP32DenormalMode == llvm::DenormalMode::IEEE
+ CGOpts.FP32DenormalMode.Output == llvm::DenormalMode::IEEE
? '+' : '-') + Twine("fp32-denormals"))
.str());
// Always do not flush fp64 or fp16 denorms.
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 9ed2ccd..cdd3ca4 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1749,14 +1749,14 @@
FuncAttrs.addAttribute("null-pointer-is-valid", "true");
// TODO: Omit attribute when the default is IEEE.
- if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::Invalid)
+ if (CodeGenOpts.FPDenormalMode.isValid())
FuncAttrs.addAttribute("denormal-fp-math",
- llvm::denormalModeName(CodeGenOpts.FPDenormalMode));
-
- if (CodeGenOpts.FP32DenormalMode != llvm::DenormalMode::Invalid)
+ CodeGenOpts.FPDenormalMode.str());
+ if (CodeGenOpts.FP32DenormalMode.isValid()) {
FuncAttrs.addAttribute(
"denormal-fp-math-f32",
- llvm::denormalModeName(CodeGenOpts.FP32DenormalMode));
+ CodeGenOpts.FP32DenormalMode.str());
+ }
FuncAttrs.addAttribute("no-trapping-math",
llvm::toStringRef(CodeGenOpts.NoTrappingMath));
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 6a43b6b..4e73002 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -587,7 +587,7 @@
// floating point values to 0. (This corresponds to its "__CUDA_FTZ"
// property.)
getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
- CodeGenOpts.FP32DenormalMode !=
+ CodeGenOpts.FP32DenormalMode.Output !=
llvm::DenormalMode::IEEE);
}
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 68091b6..06e4686 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -108,14 +108,14 @@
const llvm::fltSemantics *FPType) const {
// Denormals should always be enabled for f16 and f64.
if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
- return llvm::DenormalMode::IEEE;
+ return llvm::DenormalMode::getIEEE();
if (DeviceOffloadKind == Action::OFK_Cuda) {
if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
options::OPT_fno_cuda_flush_denormals_to_zero,
false))
- return llvm::DenormalMode::PreserveSign;
+ return llvm::DenormalMode::getPreserveSign();
}
const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
@@ -134,7 +134,8 @@
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
!DefaultDenormsAreZeroForTarget;
// Outputs are flushed to zero, preserving sign
- return DAZ ? llvm::DenormalMode::PreserveSign : llvm::DenormalMode::IEEE;
+ return DAZ ? llvm::DenormalMode::getPreserveSign() :
+ llvm::DenormalMode::getIEEE();
}
void AMDGPUToolChain::addClangTargetOptions(
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index aa599b0..6f092ca 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2641,7 +2641,7 @@
case options::OPT_fdenormal_fp_math_EQ:
DenormalFPMath = llvm::parseDenormalFPAttribute(A->getValue());
- if (DenormalFPMath == llvm::DenormalMode::Invalid) {
+ if (!DenormalFPMath.isValid()) {
D.Diag(diag::err_drv_invalid_value)
<< A->getAsString(Args) << A->getValue();
}
@@ -2649,7 +2649,7 @@
case options::OPT_fdenormal_fp_math_f32_EQ:
DenormalFP32Math = llvm::parseDenormalFPAttribute(A->getValue());
- if (DenormalFP32Math == llvm::DenormalMode::Invalid) {
+ if (!DenormalFP32Math.isValid()) {
D.Diag(diag::err_drv_invalid_value)
<< A->getAsString(Args) << A->getValue();
}
@@ -2768,7 +2768,7 @@
if (HonorINFs && HonorNaNs &&
!AssociativeMath && !ReciprocalMath &&
SignedZeros && TrappingMath && RoundingFPMath &&
- DenormalFPMath != llvm::DenormalMode::IEEE &&
+ DenormalFPMath != llvm::DenormalMode::getIEEE() &&
FPContract.empty())
// OK: Current Arg doesn't conflict with -ffp-model=strict
;
@@ -2816,14 +2816,18 @@
CmdArgs.push_back("-fno-trapping-math");
// TODO: Omit flag for the default IEEE instead
- if (DenormalFPMath != llvm::DenormalMode::Invalid) {
- CmdArgs.push_back(Args.MakeArgString(
- "-fdenormal-fp-math=" + llvm::denormalModeName(DenormalFPMath)));
+ if (DenormalFPMath.isValid()) {
+ llvm::SmallString<64> DenormFlag;
+ llvm::raw_svector_ostream ArgStr(DenormFlag);
+ ArgStr << "-fdenormal-fp-math=" << DenormalFPMath;
+ CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
}
- if (DenormalFP32Math != llvm::DenormalMode::Invalid) {
- CmdArgs.push_back(Args.MakeArgString(
- "-fdenormal-fp-math-f32=" + llvm::denormalModeName(DenormalFP32Math)));
+ if (DenormalFP32Math.isValid()) {
+ llvm::SmallString<64> DenormFlag;
+ llvm::raw_svector_ostream ArgStr(DenormFlag);
+ ArgStr << "-fdenormal-fp-math-f32=" << DenormalFP32Math;
+ CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
}
if (!FPContract.empty())
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 438d5e1..d605092 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -711,11 +711,11 @@
DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
options::OPT_fno_cuda_flush_denormals_to_zero,
false))
- return llvm::DenormalMode::PreserveSign;
+ return llvm::DenormalMode::getPreserveSign();
}
assert(DeviceOffloadKind != Action::OFK_Host);
- return llvm::DenormalMode::IEEE;
+ return llvm::DenormalMode::getIEEE();
}
bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 9f51c847..319f0d5 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1286,14 +1286,14 @@
if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_EQ)) {
StringRef Val = A->getValue();
Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val);
- if (Opts.FPDenormalMode == llvm::DenormalMode::Invalid)
+ if (!Opts.FPDenormalMode.isValid())
Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val;
}
if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_f32_EQ)) {
StringRef Val = A->getValue();
Opts.FP32DenormalMode = llvm::parseDenormalFPAttribute(Val);
- if (Opts.FP32DenormalMode == llvm::DenormalMode::Invalid)
+ if (!Opts.FP32DenormalMode.isValid())
Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val;
}
diff --git a/clang/test/CodeGen/denormalfpmode.c b/clang/test/CodeGen/denormalfpmode.c
index b0013da..3b9ad0d 100644
--- a/clang/test/CodeGen/denormalfpmode.c
+++ b/clang/test/CodeGen/denormalfpmode.c
@@ -3,9 +3,9 @@
// RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-PZ
// CHECK-LABEL: main
-// CHECK-IEEE: attributes #0 = {{.*}}"denormal-fp-math"="ieee"{{.*}}
-// CHECK-PS: attributes #0 = {{.*}}"denormal-fp-math"="preserve-sign"{{.*}}
-// CHECK-PZ: attributes #0 = {{.*}}"denormal-fp-math"="positive-zero"{{.*}}
+// CHECK-IEEE: attributes #0 = {{.*}}"denormal-fp-math"="ieee,ieee"{{.*}}
+// CHECK-PS: attributes #0 = {{.*}}"denormal-fp-math"="preserve-sign,preserve-sign"{{.*}}
+// CHECK-PZ: attributes #0 = {{.*}}"denormal-fp-math"="positive-zero,positive-zero"{{.*}}
int main() {
return 0;
diff --git a/clang/test/CodeGenCUDA/flush-denormals.cu b/clang/test/CodeGenCUDA/flush-denormals.cu
index 850c283..a372f3f 100644
--- a/clang/test/CodeGenCUDA/flush-denormals.cu
+++ b/clang/test/CodeGenCUDA/flush-denormals.cu
@@ -39,8 +39,8 @@
// CHECK-LABEL: define void @foo() #0
extern "C" __device__ void foo() {}
-// FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign"
-// NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee"
+// FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+// NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee,ieee"
// FIXME: This should be removed
diff --git a/clang/test/CodeGenCUDA/propagate-metadata.cu b/clang/test/CodeGenCUDA/propagate-metadata.cu
index 242e0d1..45f9319 100644
--- a/clang/test/CodeGenCUDA/propagate-metadata.cu
+++ b/clang/test/CodeGenCUDA/propagate-metadata.cu
@@ -61,8 +61,8 @@
// FTZ-NOT: "denormal-fp-math"
-// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign"
-// NOFTZ-SAME: "denormal-fp-math-f32"="ieee"
+// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
// CHECK-SAME: "no-trapping-math"="true"
diff --git a/clang/test/Driver/cl-denorms-are-zero.cl b/clang/test/Driver/cl-denorms-are-zero.cl
index 23a5f78..7774c0d 100644
--- a/clang/test/Driver/cl-denorms-are-zero.cl
+++ b/clang/test/Driver/cl-denorms-are-zero.cl
@@ -14,7 +14,7 @@
// RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-DENORM %s
// RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
-// AMDGCN-FLUSH: "-fdenormal-fp-math-f32=preserve-sign"
+// AMDGCN-FLUSH: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
// This should be omitted and default to ieee
// AMDGCN-DENORM-NOT: "-fdenormal-fp-math-f32"
diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
index f0ab225..c032732 100644
--- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -9,5 +9,5 @@
// CPUFTZ-NOT: -fdenormal-fp-math
-// FTZ: "-fdenormal-fp-math-f32=preserve-sign"
-// NOFTZ: "-fdenormal-fp-math=ieee"
+// FTZ: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// NOFTZ: "-fdenormal-fp-math=ieee,ieee"
diff --git a/clang/test/Driver/denormal-fp-math.c b/clang/test/Driver/denormal-fp-math.c
index 5914c0b..af18517 100644
--- a/clang/test/Driver/denormal-fp-math.c
+++ b/clang/test/Driver/denormal-fp-math.c
@@ -3,10 +3,16 @@
// RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=positive-zero -v 2>&1 | FileCheck -check-prefix=CHECK-PZ %s
// RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=ieee -fno-fast-math -v 2>&1 | FileCheck -check-prefix=CHECK-NO-UNSAFE %s
// RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=ieee -fno-unsafe-math-optimizations -v 2>&1 | FileCheck -check-prefix=CHECK-NO-UNSAFE %s
-// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID %s
+// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID0 %s
+// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=ieee,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID1 %s
+// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,ieee -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID2 %s
+// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID3 %s
-// CHECK-IEEE: -fdenormal-fp-math=ieee
-// CHECK-PS: "-fdenormal-fp-math=preserve-sign"
-// CHECK-PZ: "-fdenormal-fp-math=positive-zero"
+// CHECK-IEEE: -fdenormal-fp-math=ieee,ieee
+// CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign"
+// CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero"
// CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee"
-// CHECK-INVALID: error: invalid value 'foo' in '-fdenormal-fp-math=foo'
+// CHECK-INVALID0: error: invalid value 'foo' in '-fdenormal-fp-math=foo'
+// CHECK-INVALID1: error: invalid value 'ieee,foo' in '-fdenormal-fp-math=ieee,foo'
+// CHECK-INVALID2: error: invalid value 'foo,ieee' in '-fdenormal-fp-math=foo,ieee'
+// CHECK-INVALID3: error: invalid value 'foo,foo' in '-fdenormal-fp-math=foo,foo'