radv: enable FP16/FP64 denormals earlier and only for LLVM ACO sets this itself and will have to set it differently in the future to support shaderDenormFlushToZeroFloat64. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>

commit: d7b0d9a8d8a5a7f3b26a30c8e7005c2fb3f2f4af [log] [tgz]
author: Rhys Perry <pendingchaos02@gmail.com> Mon Nov 11 13:41:32 2019 +0000
committer: Rhys Perry <pendingchaos02@gmail.com> Fri Nov 15 17:36:21 2019 +0000
tree: bae80057af26182f0f64403eeae34b58c46c33cc
parent: c6c76527535762642d336547c5165de5e440c552 [diff] [blame]
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index c2562c0..1e55052 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c

@@ -700,20 +700,6 @@
 	config_out->num_sgprs = num_sgprs;
 	config_out->num_shared_vgprs = num_shared_vgprs;
 
-	/* Enable 64-bit and 16-bit denormals, because there is no performance
-	 * cost.
-	 *
-	 * If denormals are enabled, all floating-point output modifiers are
-	 * ignored.
-	 *
-	 * Don't enable denormals for 32-bit floats, because:
-	 * - Floating-point output modifiers would be ignored by the hw.
-	 * - Some opcodes don't support denormals, such as v_mad_f32. We would
-	 *   have to stop using those.
-	 * - GFX6 & GFX7 would be very slow.
-	 */
-	config_out->float_mode |= V_00B028_FP_64_DENORMS;
-
 	config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
 			    S_00B12C_SCRATCH_EN(scratch_enabled);
 
@@ -971,6 +957,20 @@
 			return NULL;
 		}
 
+		/* Enable 64-bit and 16-bit denormals, because there is no performance
+		 * cost.
+		 *
+		 * If denormals are enabled, all floating-point output modifiers are
+		 * ignored.
+		 *
+		 * Don't enable denormals for 32-bit floats, because:
+		 * - Floating-point output modifiers would be ignored by the hw.
+		 * - Some opcodes don't support denormals, such as v_mad_f32. We would
+		 *   have to stop using those.
+		 * - GFX6 & GFX7 would be very slow.
+		 */
+		config.float_mode |= V_00B028_FP_64_DENORMS;
+
 		if (rtld_binary.lds_size > 0) {
 			unsigned alloc_granularity = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256;
 			config.lds_size = align(rtld_binary.lds_size, alloc_granularity) / alloc_granularity;
commit	d7b0d9a8d8a5a7f3b26a30c8e7005c2fb3f2f4af	[log] [tgz]
author	Rhys Perry <pendingchaos02@gmail.com>	Mon Nov 11 13:41:32 2019 +0000
committer	Rhys Perry <pendingchaos02@gmail.com>	Fri Nov 15 17:36:21 2019 +0000
tree	bae80057af26182f0f64403eeae34b58c46c33cc
parent	c6c76527535762642d336547c5165de5e440c552 [diff] [blame]