blob: 1e28db44b804473a84f014071a82a6a6ef726e9c [file] [log] [blame]
Justin Lebar1c9692a2017-01-31 05:58:22 +00001; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \
2; RUN: | FileCheck %s
3
4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
5
6declare float @llvm.sqrt.f32(float)
7declare double @llvm.sqrt.f64(double)
8
9; -- reciprocal sqrt --
10
11; CHECK-LABEL test_rsqrt32
12define float @test_rsqrt32(float %a) #0 {
13; CHECK: rsqrt.approx.f32
14 %val = tail call float @llvm.sqrt.f32(float %a)
15 %ret = fdiv float 1.0, %val
16 ret float %ret
17}
18
19; CHECK-LABEL test_rsqrt_ftz
20define float @test_rsqrt_ftz(float %a) #0 #1 {
21; CHECK: rsqrt.approx.ftz.f32
22 %val = tail call float @llvm.sqrt.f32(float %a)
23 %ret = fdiv float 1.0, %val
24 ret float %ret
25}
26
27; CHECK-LABEL test_rsqrt64
28define double @test_rsqrt64(double %a) #0 {
29; CHECK: rsqrt.approx.f64
30 %val = tail call double @llvm.sqrt.f64(double %a)
31 %ret = fdiv double 1.0, %val
32 ret double %ret
33}
34
35; CHECK-LABEL test_rsqrt64_ftz
36define double @test_rsqrt64_ftz(double %a) #0 #1 {
37; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
38; CHECK: rsqrt.approx.f64
39 %val = tail call double @llvm.sqrt.f64(double %a)
40 %ret = fdiv double 1.0, %val
41 ret double %ret
42}
43
44; -- sqrt --
45
46; CHECK-LABEL test_sqrt32
47define float @test_sqrt32(float %a) #0 {
48; CHECK: sqrt.approx.f32
49 %ret = tail call float @llvm.sqrt.f32(float %a)
50 ret float %ret
51}
52
53; CHECK-LABEL test_sqrt_ftz
54define float @test_sqrt_ftz(float %a) #0 #1 {
55; CHECK: sqrt.approx.ftz.f32
56 %ret = tail call float @llvm.sqrt.f32(float %a)
57 ret float %ret
58}
59
60; CHECK-LABEL test_sqrt64
61define double @test_sqrt64(double %a) #0 {
Justin Lebar06fcea42017-01-31 23:08:57 +000062; There's no sqrt.approx.f64 instruction; we emit
63; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal,
64; so we just use the ftz version.
Justin Lebar1c9692a2017-01-31 05:58:22 +000065; CHECK: rsqrt.approx.f64
Justin Lebar06fcea42017-01-31 23:08:57 +000066; CHECK: rcp.approx.ftz.f64
Justin Lebar1c9692a2017-01-31 05:58:22 +000067 %ret = tail call double @llvm.sqrt.f64(double %a)
68 ret double %ret
69}
70
71; CHECK-LABEL test_sqrt64_ftz
72define double @test_sqrt64_ftz(double %a) #0 #1 {
73; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
74; CHECK: rsqrt.approx.f64
Justin Lebar06fcea42017-01-31 23:08:57 +000075; CHECK: rcp.approx.ftz.f64
Justin Lebar1c9692a2017-01-31 05:58:22 +000076 %ret = tail call double @llvm.sqrt.f64(double %a)
77 ret double %ret
78}
79
80; -- refined sqrt and rsqrt --
81;
82; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed
83; by some math.
84
85; CHECK-LABEL: test_rsqrt32_refined
86define float @test_rsqrt32_refined(float %a) #0 #2 {
87; CHECK: rsqrt.approx.f32
88 %val = tail call float @llvm.sqrt.f32(float %a)
89 %ret = fdiv float 1.0, %val
90 ret float %ret
91}
92
93; CHECK-LABEL: test_sqrt32_refined
94define float @test_sqrt32_refined(float %a) #0 #2 {
95; CHECK: rsqrt.approx.f32
96 %ret = tail call float @llvm.sqrt.f32(float %a)
97 ret float %ret
98}
99
100; CHECK-LABEL: test_rsqrt64_refined
101define double @test_rsqrt64_refined(double %a) #0 #2 {
102; CHECK: rsqrt.approx.f64
103 %val = tail call double @llvm.sqrt.f64(double %a)
104 %ret = fdiv double 1.0, %val
105 ret double %ret
106}
107
108; CHECK-LABEL: test_sqrt64_refined
109define double @test_sqrt64_refined(double %a) #0 #2 {
110; CHECK: rsqrt.approx.f64
111 %ret = tail call double @llvm.sqrt.f64(double %a)
112 ret double %ret
113}
114
115; -- refined sqrt and rsqrt with ftz enabled --
116
117; CHECK-LABEL: test_rsqrt32_refined_ftz
118define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
119; CHECK: rsqrt.approx.ftz.f32
120 %val = tail call float @llvm.sqrt.f32(float %a)
121 %ret = fdiv float 1.0, %val
122 ret float %ret
123}
124
125; CHECK-LABEL: test_sqrt32_refined_ftz
126define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
127; CHECK: rsqrt.approx.ftz.f32
128 %ret = tail call float @llvm.sqrt.f32(float %a)
129 ret float %ret
130}
131
132; CHECK-LABEL: test_rsqrt64_refined_ftz
133define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
134; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version.
135; CHECK: rsqrt.approx.f64
136 %val = tail call double @llvm.sqrt.f64(double %a)
137 %ret = fdiv double 1.0, %val
138 ret double %ret
139}
140
141; CHECK-LABEL: test_sqrt64_refined_ftz
142define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
143; CHECK: rsqrt.approx.f64
144 %ret = tail call double @llvm.sqrt.f64(double %a)
145 ret double %ret
146}
147
148attributes #0 = { "unsafe-fp-math" = "true" }
149attributes #1 = { "nvptx-f32ftz" = "true" }
150attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }