blob: 56b1f88f3b2eaf99a35c2e0fdee43063d88117e9 [file] [log] [blame]
Justin Holewinskicd069e62013-07-22 12:18:04 +00001; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
2
Justin Lebar1c9692a2017-01-31 05:58:22 +00003declare float @llvm.sqrt.f32(float)
4declare double @llvm.sqrt.f64(double)
Justin Holewinskicd069e62013-07-22 12:18:04 +00005
Justin Lebar1c9692a2017-01-31 05:58:22 +00006; CHECK-LABEL: sqrt_div(
Justin Holewinskicd069e62013-07-22 12:18:04 +00007; CHECK: sqrt.rn.f32
8; CHECK: div.rn.f32
9define float @sqrt_div(float %a, float %b) {
Justin Lebar1c9692a2017-01-31 05:58:22 +000010 %t1 = tail call float @llvm.sqrt.f32(float %a)
Justin Holewinskicd069e62013-07-22 12:18:04 +000011 %t2 = fdiv float %t1, %b
12 ret float %t2
13}
14
Justin Lebar1c9692a2017-01-31 05:58:22 +000015; CHECK-LABEL: sqrt_div_fast(
Justin Holewinskicd069e62013-07-22 12:18:04 +000016; CHECK: sqrt.approx.f32
17; CHECK: div.approx.f32
18define float @sqrt_div_fast(float %a, float %b) #0 {
Justin Lebar1c9692a2017-01-31 05:58:22 +000019 %t1 = tail call float @llvm.sqrt.f32(float %a)
Justin Holewinskicd069e62013-07-22 12:18:04 +000020 %t2 = fdiv float %t1, %b
21 ret float %t2
22}
23
Justin Lebar1c9692a2017-01-31 05:58:22 +000024; CHECK-LABEL: sqrt_div_ftz(
25; CHECK: sqrt.rn.ftz.f32
26; CHECK: div.rn.ftz.f32
27define float @sqrt_div_ftz(float %a, float %b) #1 {
28 %t1 = tail call float @llvm.sqrt.f32(float %a)
29 %t2 = fdiv float %t1, %b
30 ret float %t2
31}
32
33; CHECK-LABEL: sqrt_div_fast_ftz(
34; CHECK: sqrt.approx.ftz.f32
35; CHECK: div.approx.ftz.f32
36define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
37 %t1 = tail call float @llvm.sqrt.f32(float %a)
38 %t2 = fdiv float %t1, %b
39 ret float %t2
40}
41
42; There are no fast-math or ftz versions of sqrt and div for f64. We use
Justin Lebar06fcea42017-01-31 23:08:57 +000043; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
Justin Lebar1c9692a2017-01-31 05:58:22 +000044
45; CHECK-LABEL: sqrt_div_fast_ftz_f64(
46; CHECK: rsqrt.approx.f64
Justin Lebar06fcea42017-01-31 23:08:57 +000047; CHECK: rcp.approx.ftz.f64
Justin Lebar1c9692a2017-01-31 05:58:22 +000048; CHECK: div.rn.f64
49define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
50 %t1 = tail call double @llvm.sqrt.f64(double %a)
51 %t2 = fdiv double %t1, %b
52 ret double %t2
53}
54
55; CHECK-LABEL: rsqrt(
56; CHECK-NOT: rsqrt.approx
57; CHECK: sqrt.rn.f32
58; CHECK-NOT: rsqrt.approx
59define float @rsqrt(float %a) {
60 %b = tail call float @llvm.sqrt.f32(float %a)
61 %ret = fdiv float 1.0, %b
62 ret float %ret
63}
64
65; CHECK-LABEL: rsqrt_fast(
66; CHECK-NOT: div.
67; CHECK-NOT: sqrt.
68; CHECK: rsqrt.approx.f32
69; CHECK-NOT: div.
70; CHECK-NOT: sqrt.
71define float @rsqrt_fast(float %a) #0 {
72 %b = tail call float @llvm.sqrt.f32(float %a)
73 %ret = fdiv float 1.0, %b
74 ret float %ret
75}
76
77; CHECK-LABEL: rsqrt_fast_ftz(
78; CHECK-NOT: div.
79; CHECK-NOT: sqrt.
80; CHECK: rsqrt.approx.ftz.f32
81; CHECK-NOT: div.
82; CHECK-NOT: sqrt.
83define float @rsqrt_fast_ftz(float %a) #0 #1 {
84 %b = tail call float @llvm.sqrt.f32(float %a)
85 %ret = fdiv float 1.0, %b
86 ret float %ret
87}
88
Justin Lebarc1e2d972017-01-10 23:42:46 +000089; CHECK-LABEL: fadd
Justin Lebar7d818132017-01-10 23:43:04 +000090; CHECK: add.rn.f32
Justin Holewinskicd069e62013-07-22 12:18:04 +000091define float @fadd(float %a, float %b) {
92 %t1 = fadd float %a, %b
93 ret float %t1
94}
95
Justin Lebarc1e2d972017-01-10 23:42:46 +000096; CHECK-LABEL: fadd_ftz
Justin Lebar7d818132017-01-10 23:43:04 +000097; CHECK: add.rn.ftz.f32
Justin Holewinskicd069e62013-07-22 12:18:04 +000098define float @fadd_ftz(float %a, float %b) #1 {
99 %t1 = fadd float %a, %b
100 ret float %t1
101}
102
Artem Belevichd109f462017-01-13 18:48:13 +0000103declare float @llvm.sin.f32(float)
104declare float @llvm.cos.f32(float)
105
106; CHECK-LABEL: fsin_approx
107; CHECK: sin.approx.f32
108define float @fsin_approx(float %a) #0 {
109 %r = tail call float @llvm.sin.f32(float %a)
110 ret float %r
111}
112
113; CHECK-LABEL: fcos_approx
114; CHECK: cos.approx.f32
115define float @fcos_approx(float %a) #0 {
116 %r = tail call float @llvm.cos.f32(float %a)
117 ret float %r
118}
119
Justin Lebare90c4682017-02-03 15:13:50 +0000120; CHECK-LABEL: repeated_div_recip_allowed
121define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) {
122; CHECK: rcp.rn.f32
123; CHECK: mul.rn.f32
124; CHECK: mul.rn.f32
125 %x = fdiv arcp float %a, %divisor
126 %y = fdiv arcp float %b, %divisor
127 %z = select i1 %pred, float %x, float %y
128 ret float %z
129}
130
131; CHECK-LABEL: repeated_div_recip_allowed_ftz
132define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
133; CHECK: rcp.rn.ftz.f32
134; CHECK: mul.rn.ftz.f32
135; CHECK: mul.rn.ftz.f32
136 %x = fdiv arcp float %a, %divisor
137 %y = fdiv arcp float %b, %divisor
138 %z = select i1 %pred, float %x, float %y
139 ret float %z
140}
141
142; CHECK-LABEL: repeated_div_fast
143define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
144; CHECK: rcp.approx.f32
145; CHECK: mul.f32
146; CHECK: mul.f32
147 %x = fdiv float %a, %divisor
148 %y = fdiv float %b, %divisor
149 %z = select i1 %pred, float %x, float %y
150 ret float %z
151}
152
153; CHECK-LABEL: repeated_div_fast_ftz
154define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
155; CHECK: rcp.approx.ftz.f32
156; CHECK: mul.ftz.f32
157; CHECK: mul.ftz.f32
158 %x = fdiv float %a, %divisor
159 %y = fdiv float %b, %divisor
160 %z = select i1 %pred, float %x, float %y
161 ret float %z
162}
163
Justin Holewinskicd069e62013-07-22 12:18:04 +0000164attributes #0 = { "unsafe-fp-math" = "true" }
165attributes #1 = { "nvptx-f32ftz" = "true" }