blob: 0c7160df2b96d04caa14dba31fb1d03c296c1e9e [file] [log] [blame]
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +00001; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s
2; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s
Matt Arsenault86de4862016-06-24 07:07:55 +00003; Make sure this doesn't crash with no triple
4
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +00005; NOOP-LABEL: @noop_fdiv_fpmath(
6; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00007define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +00008 %md.25ulp = fdiv float %a, %b, !fpmath !0
9 store volatile float %md.25ulp, float addrspace(1)* %out
Matt Arsenault86de4862016-06-24 07:07:55 +000010 ret void
11}
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000012
13; CHECK-LABEL: @fdiv_fpmath(
14; CHECK: %no.md = fdiv float %a, %b{{$}}
15; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
16; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
17; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
18; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +000019; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
20; CHECK: arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000021define amdgpu_kernel void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000022 %no.md = fdiv float %a, %b
23 store volatile float %no.md, float addrspace(1)* %out
24
25 %md.half.ulp = fdiv float %a, %b, !fpmath !1
26 store volatile float %md.half.ulp, float addrspace(1)* %out
27
28 %md.1ulp = fdiv float %a, %b, !fpmath !2
29 store volatile float %md.1ulp, float addrspace(1)* %out
30
31 %md.25ulp = fdiv float %a, %b, !fpmath !0
32 store volatile float %md.25ulp, float addrspace(1)* %out
33
34 %md.3ulp = fdiv float %a, %b, !fpmath !3
35 store volatile float %md.3ulp, float addrspace(1)* %out
36
37 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
38 store volatile float %fast.md.25ulp, float addrspace(1)* %out
39
40 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
41 store volatile float %arcp.md.25ulp, float addrspace(1)* %out
42
43 ret void
44}
45
46; CHECK-LABEL: @rcp_fdiv_fpmath(
47; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
Matt Arsenaulte3862cd2016-07-26 23:25:44 +000048; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000049; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
50; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}}
51; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0
52; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}}
53; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000054define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000055 %no.md = fdiv float 1.0, %x
56 store volatile float %no.md, float addrspace(1)* %out
57
Matt Arsenaulte3862cd2016-07-26 23:25:44 +000058 %md.25ulp = fdiv float 1.0, %x, !fpmath !0
59 store volatile float %md.25ulp, float addrspace(1)* %out
60
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000061 %md.half.ulp = fdiv float 1.0, %x, !fpmath !1
62 store volatile float %md.half.ulp, float addrspace(1)* %out
63
64 %arcp.no.md = fdiv arcp float 1.0, %x
65 store volatile float %arcp.no.md, float addrspace(1)* %out
66
67 %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0
68 store volatile float %arcp.25ulp, float addrspace(1)* %out
69
70 %fast.no.md = fdiv fast float 1.0, %x
71 store volatile float %fast.no.md, float addrspace(1)* %out
72
73 %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0
74 store volatile float %fast.25ulp, float addrspace(1)* %out
75
76 ret void
77}
78
79; CHECK-LABEL: @fdiv_fpmath_vector(
80; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}}
81; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
82; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
83
84; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
85; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
86; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0
87; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
88; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
89; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
90; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0
91; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000092define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000093 %no.md = fdiv <2 x float> %a, %b
94 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
95
96 %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
97 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
98
99 %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
100 store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out
101
102 %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
103 store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out
104
105 ret void
106}
107
108; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
109; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
110; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
111; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
112; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +0000113; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
114; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000115; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000116define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000117 %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
118 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
119
120 %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1
121 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
122
123 %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x
124 store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
125
126 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
127 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
128
129 %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
130 store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
131
132 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
133 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
134
135 ret void
136}
137
138; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
139; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
140; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
141; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}}
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +0000142; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
143; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000144; CHECK: store volatile <2 x float> %fast.25ulp
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000145define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000146 %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
147 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
148
149 %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x
150 store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
151
152 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x
153 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
154
155 %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
156 store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
157
158 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
159 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
160
161 ret void
162}
163
164; FIXME: Should be able to get fdiv for 1.0 component
165; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +0000166; CHECK: %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000167; CHECK: store volatile <2 x float> %arcp.25ulp
168
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +0000169; CHECK: %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000170; CHECK: store volatile <2 x float> %fast.25ulp
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000171define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000172 %x.insert = insertelement <2 x float> %x, float 1.0, i32 0
173
174 %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
175 store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
176
177 %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
178 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
179
180 ret void
181}
182
183; CHECK-LABEL: @fdiv_fpmath_f32_denormals(
184; CHECK: %no.md = fdiv float %a, %b{{$}}
185; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
186; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
187; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0
188; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +0000189; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
190; CHECK: %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000191define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000192 %no.md = fdiv float %a, %b
193 store volatile float %no.md, float addrspace(1)* %out
194
195 %md.half.ulp = fdiv float %a, %b, !fpmath !1
196 store volatile float %md.half.ulp, float addrspace(1)* %out
197
198 %md.1ulp = fdiv float %a, %b, !fpmath !2
199 store volatile float %md.1ulp, float addrspace(1)* %out
200
201 %md.25ulp = fdiv float %a, %b, !fpmath !0
202 store volatile float %md.25ulp, float addrspace(1)* %out
203
204 %md.3ulp = fdiv float %a, %b, !fpmath !3
205 store volatile float %md.3ulp, float addrspace(1)* %out
206
207 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
208 store volatile float %fast.md.25ulp, float addrspace(1)* %out
209
210 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
211 store volatile float %arcp.md.25ulp, float addrspace(1)* %out
212
213 ret void
214}
215
216attributes #0 = { nounwind optnone noinline }
217attributes #1 = { nounwind }
218attributes #2 = { nounwind "target-features"="+fp32-denormals" }
219
220; CHECK: !0 = !{float 2.500000e+00}
221; CHECK: !1 = !{float 5.000000e-01}
222; CHECK: !2 = !{float 1.000000e+00}
223; CHECK: !3 = !{float 3.000000e+00}
224
225!0 = !{float 2.500000e+00}
226!1 = !{float 5.000000e-01}
227!2 = !{float 1.000000e+00}
228!3 = !{float 3.000000e+00}