AMDGPU: Change fdiv lowering based on !fpmath metadata If 2.5 ulp is acceptable, denormals are not required, and isn't a reciprocal which will already be handled, replace with a faster fdiv. Simplify the lowering tests by using per function subtarget features. llvm-svn: 276051

commit: a1fe17c9adb2b6093f1ce848a48fb8954c27c595 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Tue Jul 19 23:16:53 2016 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Tue Jul 19 23:16:53 2016 +0000
tree: 2fcb8b6fcd3f50a1c72634d2808ad3fdc7206d90
parent: 1986030b62601d8cd6d74cfc083e4638be3d8b46 [diff] [blame]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
new file mode 100644
index 0000000..54d7848
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll

@@ -0,0 +1,18 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+
+declare float @llvm.amdgcn.fdiv.fast(float, float) #0
+
+; CHECK-LABEL: {{^}}test_fdiv_fast:
+; CHECK: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
+; CHECK: v_mul_f32_e32
+; CHECK: v_rcp_f32_e32
+; CHECK: v_mul_f32_e32
+; CHECK: v_mul_f32_e32
+define void @test_fdiv_fast(float addrspace(1)* %out, float %a, float %b) #1 {
+  %fdiv = call float @llvm.amdgcn.fdiv.fast(float %a, float %b)
+  store float %fdiv, float addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
commit	a1fe17c9adb2b6093f1ce848a48fb8954c27c595	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Tue Jul 19 23:16:53 2016 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Tue Jul 19 23:16:53 2016 +0000
tree	2fcb8b6fcd3f50a1c72634d2808ad3fdc7206d90
parent	1986030b62601d8cd6d74cfc083e4638be3d8b46 [diff] [blame]