[NVPTX] Compute approx sqrt as 1/rsqrt(x) rather than x*rsqrt(x). x*rsqrt(x) returns NaN for x == 0, whereas 1/rsqrt(x) returns 0, as desired. Verified that the particular nvptx approximate instructions here do in fact return 0 for x = 0. llvm-svn: 293713

commit: 06fcea4cd92ecedb8ddcebbe80650d2e92baf2db [log] [tgz]
author: Justin Lebar <jlebar@google.com> Tue Jan 31 23:08:57 2017 +0000
committer: Justin Lebar <jlebar@google.com> Tue Jan 31 23:08:57 2017 +0000
tree: fa8e53f46d0dd2880f4a78ab50464f673d197007
parent: d9953d9dd289b87c17ed6b60d14f2db62b1f9dc4 [diff] [blame]
diff --git a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
index 5edf9e2..1e28db4 100644
--- a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll

@@ -59,9 +59,11 @@
 
 ; CHECK-LABEL test_sqrt64
 define double @test_sqrt64(double %a) #0 {
-; There's no sqrt.approx.f64 instruction; we emit x * rsqrt.approx.f64(x).
+; There's no sqrt.approx.f64 instruction; we emit
+; reciprocal(rsqrt.approx.f64(x)).  There's no non-ftz approximate reciprocal,
+; so we just use the ftz version.
 ; CHECK: rsqrt.approx.f64
-; CHECK: mul.f64
+; CHECK: rcp.approx.ftz.f64
   %ret = tail call double @llvm.sqrt.f64(double %a)
   ret double %ret
 }
@@ -70,7 +72,7 @@
 define double @test_sqrt64_ftz(double %a) #0 #1 {
 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
 ; CHECK: rsqrt.approx.f64
-; CHECK: mul.f64
+; CHECK: rcp.approx.ftz.f64
   %ret = tail call double @llvm.sqrt.f64(double %a)
   ret double %ret
 }
commit	06fcea4cd92ecedb8ddcebbe80650d2e92baf2db	[log] [tgz]
author	Justin Lebar <jlebar@google.com>	Tue Jan 31 23:08:57 2017 +0000
committer	Justin Lebar <jlebar@google.com>	Tue Jan 31 23:08:57 2017 +0000
tree	fa8e53f46d0dd2880f4a78ab50464f673d197007
parent	d9953d9dd289b87c17ed6b60d14f2db62b1f9dc4 [diff] [blame]