AMDGPU: Combine fp16/fp64 subtarget features The same control register controls both, and are set to the same defaults. Keep the old names around as aliases. llvm-svn: 292837

commit: a6867fd441a14d9aa9a1a04085a7d0d5ec34d6eb [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Mon Jan 23 22:31:03 2017 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Mon Jan 23 22:31:03 2017 +0000
tree: 108d7d49ad789acdc34b4cbb1072583140208240
parent: f86d385813f9cd36bef50eeae7c58e7539a81e4d [diff]
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 46cd112..0c5f9e7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td

@@ -206,12 +206,6 @@
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
 
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
-  "FP16Denormals",
-  "true",
-  "Enable half precision denormal handling"
->;
-
 // Some instructions do not support denormals despite this flag. Using
 // fp32 denormals also causes instructions to run at the double
 // precision rate for the device.
@@ -221,13 +215,30 @@
   "Enable single precision denormal handling"
 >;
 
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
-  "FP64Denormals",
+// Denormal handling for fp64 and fp16 is controlled by the same
+// config register when fp16 supported.
+// TODO: Do we need a separate f16 setting when not legal?
+def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
+  "FP64FP16Denormals",
   "true",
-  "Enable double precision denormal handling",
+  "Enable double and half precision denormal handling",
   [FeatureFP64]
 >;
 
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+  "FP64FP16Denormals",
+  "true",
+  "Enable double and half precision denormal handling",
+  [FeatureFP64, FeatureFP64FP16Denormals]
+>;
+
+def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
+  "FP64FP16Denormals",
+  "true",
+  "Enable half precision denormal handling",
+  [FeatureFP64FP16Denormals]
+>;
+
 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
   "FPExceptions",
   "true",

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 4e8529b..4848b3b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

@@ -41,9 +41,10 @@
   // for SI has the unhelpful behavior that it unsets everything else if you
   // disable it.
 
-  SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
+  SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
     FullFS += "+flat-for-global,+unaligned-buffer-access,";
+
   FullFS += FS;
 
   ParseSubtargetFeatures(GPU, FullFS);
@@ -52,9 +53,8 @@
   // denormals, but should be checked. Should we issue a warning somewhere
   // if someone tries to enable these?
   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
-    FP16Denormals = false;
+    FP64FP16Denormals = false;
     FP32Denormals = false;
-    FP64Denormals = false;
   }
 
   // Set defaults if needed.
@@ -78,9 +78,8 @@
     FastFMAF32(false),
     HalfRate64Ops(false),
 
-    FP16Denormals(false),
     FP32Denormals(false),
-    FP64Denormals(false),
+    FP64FP16Denormals(false),
     FPExceptions(false),
     FlatForGlobal(false),
     UnalignedScratchAccess(false),

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 26c4c34..c8414f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

@@ -81,9 +81,8 @@
   bool HalfRate64Ops;
 
   // Dynamially set bits that enable features.
-  bool FP16Denormals;
   bool FP32Denormals;
-  bool FP64Denormals;
+  bool FP64FP16Denormals;
   bool FPExceptions;
   bool FlatForGlobal;
   bool UnalignedScratchAccess;
@@ -282,7 +281,7 @@
   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
 
   bool hasFP16Denormals() const {
-    return FP16Denormals;
+    return FP64FP16Denormals;
   }
 
   bool hasFP32Denormals() const {
@@ -290,7 +289,7 @@
   }
 
   bool hasFP64Denormals() const {
-    return FP64Denormals;
+    return FP64FP16Denormals;
   }
 
   bool hasFPExceptions() const {
commit	a6867fd441a14d9aa9a1a04085a7d0d5ec34d6eb	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Mon Jan 23 22:31:03 2017 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Mon Jan 23 22:31:03 2017 +0000
tree	108d7d49ad789acdc34b4cbb1072583140208240
parent	f86d385813f9cd36bef50eeae7c58e7539a81e4d [diff]