AMDGPU: Combine fp16/fp64 subtarget features
The same control register controls both, and are set to
the same defaults. Keep the old names around as aliases.
llvm-svn: 292837
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 46cd112..0c5f9e7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -206,12 +206,6 @@
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
- "FP16Denormals",
- "true",
- "Enable half precision denormal handling"
->;
-
// Some instructions do not support denormals despite this flag. Using
// fp32 denormals also causes instructions to run at the double
// precision rate for the device.
@@ -221,13 +215,30 @@
"Enable single precision denormal handling"
>;
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
- "FP64Denormals",
+// Denormal handling for fp64 and fp16 is controlled by the same
+// config register when fp16 supported.
+// TODO: Do we need a separate f16 setting when not legal?
+def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
+ "FP64FP16Denormals",
"true",
- "Enable double precision denormal handling",
+ "Enable double and half precision denormal handling",
[FeatureFP64]
>;
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable double and half precision denormal handling",
+ [FeatureFP64, FeatureFP64FP16Denormals]
+>;
+
+def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable half precision denormal handling",
+ [FeatureFP64FP16Denormals]
+>;
+
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
"FPExceptions",
"true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 4e8529b..4848b3b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -41,9 +41,10 @@
// for SI has the unhelpful behavior that it unsets everything else if you
// disable it.
- SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
+ SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,";
+
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
@@ -52,9 +53,8 @@
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- FP16Denormals = false;
+ FP64FP16Denormals = false;
FP32Denormals = false;
- FP64Denormals = false;
}
// Set defaults if needed.
@@ -78,9 +78,8 @@
FastFMAF32(false),
HalfRate64Ops(false),
- FP16Denormals(false),
FP32Denormals(false),
- FP64Denormals(false),
+ FP64FP16Denormals(false),
FPExceptions(false),
FlatForGlobal(false),
UnalignedScratchAccess(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 26c4c34..c8414f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -81,9 +81,8 @@
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP16Denormals;
bool FP32Denormals;
- bool FP64Denormals;
+ bool FP64FP16Denormals;
bool FPExceptions;
bool FlatForGlobal;
bool UnalignedScratchAccess;
@@ -282,7 +281,7 @@
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
bool hasFP16Denormals() const {
- return FP16Denormals;
+ return FP64FP16Denormals;
}
bool hasFP32Denormals() const {
@@ -290,7 +289,7 @@
}
bool hasFP64Denormals() const {
- return FP64Denormals;
+ return FP64FP16Denormals;
}
bool hasFPExceptions() const {