blob: aa6c1833bdeccb6313210cc1725002ba78091d5f [file] [log] [blame]
Yaxun Liu0d9673c2017-11-04 17:37:43 +00001; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
2; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-PRELINK %s
3; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-NATIVE %s
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +00004
5; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
6; GCN-POSTLINK: tail call fast float @_Z3sinf(
7; GCN-POSTLINK: tail call fast float @_Z3cosf(
Yaxun Liu0d9673c2017-11-04 17:37:43 +00008; GCN-PRELINK: call fast float @_Z6sincosfPf(
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +00009; GCN-NATIVE: tail call fast float @_Z10native_sinf(
10; GCN-NATIVE: tail call fast float @_Z10native_cosf(
11define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) {
12entry:
13 %tmp = load float, float addrspace(1)* %a, align 4
14 %call = tail call fast float @_Z3sinf(float %tmp)
15 store float %call, float addrspace(1)* %a, align 4
16 %call2 = tail call fast float @_Z3cosf(float %tmp)
17 %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
18 store float %call2, float addrspace(1)* %arrayidx3, align 4
19 ret void
20}
21
22declare float @_Z3sinf(float)
23
24declare float @_Z3cosf(float)
25
26; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2
27; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f(
28; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f(
Yaxun Liu0d9673c2017-11-04 17:37:43 +000029; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_(
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +000030; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f(
31; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f(
32define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) {
33entry:
34 %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8
35 %call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp)
36 store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8
37 %call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp)
38 %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1
39 store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8
40 ret void
41}
42
43declare <2 x float> @_Z3sinDv2_f(<2 x float>)
44
45declare <2 x float> @_Z3cosDv2_f(<2 x float>)
46
47; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3
48; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f(
49; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f(
Yaxun Liu0d9673c2017-11-04 17:37:43 +000050; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_(
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +000051; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f(
52; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f(
53define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) {
54entry:
55 %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)*
56 %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16
57 %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
58 %call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
59 %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
60 store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16
61 %call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
62 %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1
63 %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
64 %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)*
65 store <4 x float> %extractVec13, <4 x float> addrspace(1)* %storetmp14, align 16
66 ret void
67}
68
69declare <3 x float> @_Z3sinDv3_f(<3 x float>)
70
71declare <3 x float> @_Z3cosDv3_f(<3 x float>)
72
73; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4
74; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f(
75; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f(
Yaxun Liu0d9673c2017-11-04 17:37:43 +000076; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_(
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +000077; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f(
78; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f(
79define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) {
80entry:
81 %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
82 %call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp)
83 store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16
84 %call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp)
85 %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1
86 store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16
87 ret void
88}
89
90declare <4 x float> @_Z3sinDv4_f(<4 x float>)
91
92declare <4 x float> @_Z3cosDv4_f(<4 x float>)
93
94; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8
95; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f(
96; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f(
Yaxun Liu0d9673c2017-11-04 17:37:43 +000097; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_(
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +000098; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f(
99; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f(
100define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) {
101entry:
102 %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32
103 %call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp)
104 store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32
105 %call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp)
106 %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1
107 store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32
108 ret void
109}
110
111declare <8 x float> @_Z3sinDv8_f(<8 x float>)
112
113declare <8 x float> @_Z3cosDv8_f(<8 x float>)
114
115; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16
116; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f(
117; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f(
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000118; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_(
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +0000119; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f(
120; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f(
121define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) {
122entry:
123 %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64
124 %call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp)
125 store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64
126 %call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp)
127 %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1
128 store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64
129 ret void
130}
131
132declare <16 x float> @_Z3sinDv16_f(<16 x float>)
133
134declare <16 x float> @_Z3cosDv16_f(<16 x float>)
135
136; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip
137; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
138define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) {
139entry:
140 %call = tail call fast float @_Z12native_recipf(float 3.000000e+00)
141 store float %call, float addrspace(1)* %a, align 4
142 ret void
143}
144
145declare float @_Z12native_recipf(float)
146
147; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip
148; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
149define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) {
150entry:
151 %call = tail call fast float @_Z10half_recipf(float 3.000000e+00)
152 store float %call, float addrspace(1)* %a, align 4
153 ret void
154}
155
156declare float @_Z10half_recipf(float)
157
158; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide
159; GCN: fmul fast float %tmp, 0x3FD5555560000000
160define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) {
161entry:
162 %tmp = load float, float addrspace(1)* %a, align 4
163 %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
164 store float %call, float addrspace(1)* %a, align 4
165 ret void
166}
167
168declare float @_Z13native_divideff(float, float)
169
170; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide
171; GCN: fmul fast float %tmp, 0x3FD5555560000000
172define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) {
173entry:
174 %tmp = load float, float addrspace(1)* %a, align 4
175 %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
176 store float %call, float addrspace(1)* %a, align 4
177 ret void
178}
179
180declare float @_Z11half_divideff(float, float)
181
182; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f
183; GCN: store float 1.000000e+00, float addrspace(1)* %a
184define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) {
185entry:
186 %tmp = load float, float addrspace(1)* %a, align 4
187 %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
188 store float %call, float addrspace(1)* %a, align 4
189 ret void
190}
191
192declare float @_Z3powff(float, float)
193
194; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i
195; GCN: store float 1.000000e+00, float addrspace(1)* %a
196define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) {
197entry:
198 %tmp = load float, float addrspace(1)* %a, align 4
199 %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
200 store float %call, float addrspace(1)* %a, align 4
201 ret void
202}
203
204; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f
205; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
206; GCN: store float %tmp, float addrspace(1)* %a, align 4
207define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) {
208entry:
209 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
210 %tmp = load float, float addrspace(1)* %arrayidx, align 4
211 %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
212 store float %call, float addrspace(1)* %a, align 4
213 ret void
214}
215
216; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i
217; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
218; GCN: store float %tmp, float addrspace(1)* %a, align 4
219define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) {
220entry:
221 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
222 %tmp = load float, float addrspace(1)* %arrayidx, align 4
223 %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
224 store float %call, float addrspace(1)* %a, align 4
225 ret void
226}
227
228; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f
229; GCN: %tmp = load float, float addrspace(1)* %a, align 4
230; GCN: %__pow2 = fmul fast float %tmp, %tmp
231define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) {
232entry:
233 %tmp = load float, float addrspace(1)* %a, align 4
234 %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
235 store float %call, float addrspace(1)* %a, align 4
236 ret void
237}
238
239; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i
240; GCN: %tmp = load float, float addrspace(1)* %a, align 4
241; GCN: %__pow2 = fmul fast float %tmp, %tmp
242define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) {
243entry:
244 %tmp = load float, float addrspace(1)* %a, align 4
245 %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
246 store float %call, float addrspace(1)* %a, align 4
247 ret void
248}
249
250; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f
251; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
252; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
253define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) {
254entry:
255 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
256 %tmp = load float, float addrspace(1)* %arrayidx, align 4
257 %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
258 store float %call, float addrspace(1)* %a, align 4
259 ret void
260}
261
262; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i
263; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
264; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
265define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) {
266entry:
267 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
268 %tmp = load float, float addrspace(1)* %arrayidx, align 4
269 %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
270 store float %call, float addrspace(1)* %a, align 4
271 ret void
272}
273
274; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half
275; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
276; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
277define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) {
278entry:
279 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
280 %tmp = load float, float addrspace(1)* %arrayidx, align 4
281 %call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
282 store float %call, float addrspace(1)* %a, align 4
283 ret void
284}
285
286; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf
287; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
288; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
289define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) {
290entry:
291 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
292 %tmp = load float, float addrspace(1)* %arrayidx, align 4
293 %call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
294 store float %call, float addrspace(1)* %a, align 4
295 ret void
296}
297
298; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c
299; GCN: %__powx2 = fmul fast float %tmp, %tmp
300; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
301; GCN: %__powx22 = fmul fast float %__powx2, %tmp
Yaxun Liufc5121a2017-09-06 00:30:27 +0000302; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
303; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +0000304define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) {
305entry:
306 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
307 %tmp = load float, float addrspace(1)* %arrayidx, align 4
308 %call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01)
309 store float %call, float addrspace(1)* %a, align 4
310 ret void
311}
312
313; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c
314; GCN: %__powx2 = fmul fast float %tmp, %tmp
315; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
316; GCN: %__powx22 = fmul fast float %__powx2, %tmp
Yaxun Liufc5121a2017-09-06 00:30:27 +0000317; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
318; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +0000319define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) {
320entry:
321 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
322 %tmp = load float, float addrspace(1)* %arrayidx, align 4
323 %call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01)
324 store float %call, float addrspace(1)* %a, align 4
325 ret void
326}
327
328declare float @_Z4powrff(float, float)
329
330; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c
331; GCN: %__powx2 = fmul fast float %tmp, %tmp
332; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
333; GCN: %__powx22 = fmul fast float %__powx2, %tmp
Yaxun Liufc5121a2017-09-06 00:30:27 +0000334; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
335; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +0000336define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) {
337entry:
338 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
339 %tmp = load float, float addrspace(1)* %arrayidx, align 4
340 %call = tail call fast float @_Z4pownfi(float %tmp, i32 11)
341 store float %call, float addrspace(1)* %a, align 4
342 ret void
343}
344
345declare float @_Z4pownfi(float, i32)
346
347; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
348; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
349; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
350; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
351; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03
352; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
Yaxun Liufc5121a2017-09-06 00:30:27 +0000353; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
354; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648
355; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
356; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
357; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
358; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +0000359define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) {
360entry:
361 %tmp = load float, float addrspace(1)* %a, align 4
362 %call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
363 store float %call, float addrspace(1)* %a, align 4
364 ret void
365}
366
367; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr
368; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1)
369; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp)
370; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1
371; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
372; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4
373; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
374; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
375; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
376; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4
377define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) {
378entry:
379 %tmp = load float, float addrspace(1)* %a, align 4
380 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
381 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
382 %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
383 store float %call, float addrspace(1)* %a, align 4
384 ret void
385}
386
387; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown
388; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
389; GCN-PRELINK: %conv = fptosi float %tmp1 to i32
390; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
391; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
392; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float
393; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F
394; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
395; GCN-PRELINK: %__yeven = shl i32 %conv, 31
Yaxun Liufc5121a2017-09-06 00:30:27 +0000396; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
397; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]]
398; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
399; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
400; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
401; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +0000402define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) {
403entry:
404 %tmp = load float, float addrspace(1)* %a, align 4
405 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
406 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
407 %conv = fptosi float %tmp1 to i32
408 %call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
409 store float %call, float addrspace(1)* %a, align 4
410 ret void
411}
412
413; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
414; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
415; GCN: store float %tmp, float addrspace(1)* %a, align 4
416define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) {
417entry:
418 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
419 %tmp = load float, float addrspace(1)* %arrayidx, align 4
420 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 1)
421 store float %call, float addrspace(1)* %a, align 4
422 ret void
423}
424
425declare float @_Z5rootnfi(float, i32)
426
427; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2
428; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2)
429; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
430define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) {
431entry:
432 %tmp = load float, float addrspace(1)* %a, align 4
433 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 2)
434 store float %call, float addrspace(1)* %a, align 4
435 ret void
436}
437
438; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3
439; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3)
440; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp)
441define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) {
442entry:
443 %tmp = load float, float addrspace(1)* %a, align 4
444 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 3)
445 store float %call, float addrspace(1)* %a, align 4
446 ret void
447}
448
449; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1
450; GCN: fdiv fast float 1.000000e+00, %tmp
451define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) {
452entry:
453 %tmp = load float, float addrspace(1)* %a, align 4
454 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1)
455 store float %call, float addrspace(1)* %a, align 4
456 ret void
457}
458
459; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2
460; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
461; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
462define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) {
463entry:
464 %tmp = load float, float addrspace(1)* %a, align 4
465 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
466 store float %call, float addrspace(1)* %a, align 4
467 ret void
468}
469
470; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x
471; GCN: store float %y, float addrspace(1)* %a
472define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) {
473entry:
474 %tmp = load float, float addrspace(1)* %a, align 4
475 %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y)
476 store float %call, float addrspace(1)* %a, align 4
477 ret void
478}
479
480declare float @_Z3fmafff(float, float, float)
481
482; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0
483; GCN: store float %y, float addrspace(1)* %a
484define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) {
485entry:
486 %tmp = load float, float addrspace(1)* %a, align 4
487 %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y)
488 store float %call, float addrspace(1)* %a, align 4
489 ret void
490}
491
492; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x
493; GCN: store float %y, float addrspace(1)* %a
494define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) {
495entry:
496 %tmp = load float, float addrspace(1)* %a, align 4
497 %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y)
498 store float %call, float addrspace(1)* %a, align 4
499 ret void
500}
501
502declare float @_Z3madfff(float, float, float)
503
504; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0
505; GCN: store float %y, float addrspace(1)* %a
506define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) {
507entry:
508 %tmp = load float, float addrspace(1)* %a, align 4
509 %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y)
510 store float %call, float addrspace(1)* %a, align 4
511 ret void
512}
513
514; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y
515; GCN: %fmaadd = fadd fast float %tmp, %y
516define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) {
517entry:
518 %tmp = load float, float addrspace(1)* %a, align 4
519 %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y)
520 store float %call, float addrspace(1)* %a, align 4
521 ret void
522}
523
524; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy
525; GCN: %fmaadd = fadd fast float %tmp, %y
526define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) {
527entry:
528 %tmp = load float, float addrspace(1)* %a, align 4
529 %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y)
530 store float %call, float addrspace(1)* %a, align 4
531 ret void
532}
533
534; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0
535; GCN: %fmamul = fmul fast float %tmp1, %tmp
536define amdgpu_kernel void @test_fma_xy0(float addrspace(1)* nocapture %a) {
537entry:
538 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
539 %tmp = load float, float addrspace(1)* %arrayidx, align 4
540 %tmp1 = load float, float addrspace(1)* %a, align 4
541 %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00)
542 store float %call, float addrspace(1)* %a, align 4
543 ret void
544}
545
546; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp
547; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp)
548define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) {
549entry:
550 %tmp = load float, float addrspace(1)* %a, align 4
551 %call = tail call fast float @_Z3expf(float %tmp)
552 store float %call, float addrspace(1)* %a, align 4
553 ret void
554}
555
556declare float @_Z3expf(float)
557
558; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2
559; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp)
560define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) {
561entry:
562 %tmp = load float, float addrspace(1)* %a, align 4
563 %call = tail call fast float @_Z4exp2f(float %tmp)
564 store float %call, float addrspace(1)* %a, align 4
565 ret void
566}
567
568declare float @_Z4exp2f(float)
569
570; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10
571; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp)
572define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) {
573entry:
574 %tmp = load float, float addrspace(1)* %a, align 4
575 %call = tail call fast float @_Z5exp10f(float %tmp)
576 store float %call, float addrspace(1)* %a, align 4
577 ret void
578}
579
580declare float @_Z5exp10f(float)
581
582; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log
583; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp)
584define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) {
585entry:
586 %tmp = load float, float addrspace(1)* %a, align 4
587 %call = tail call fast float @_Z3logf(float %tmp)
588 store float %call, float addrspace(1)* %a, align 4
589 ret void
590}
591
592declare float @_Z3logf(float)
593
594; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2
595; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp)
596define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) {
597entry:
598 %tmp = load float, float addrspace(1)* %a, align 4
599 %call = tail call fast float @_Z4log2f(float %tmp)
600 store float %call, float addrspace(1)* %a, align 4
601 ret void
602}
603
604declare float @_Z4log2f(float)
605
606; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10
607; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp)
608define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) {
609entry:
610 %tmp = load float, float addrspace(1)* %a, align 4
611 %call = tail call fast float @_Z5log10f(float %tmp)
612 store float %call, float addrspace(1)* %a, align 4
613 ret void
614}
615
616declare float @_Z5log10f(float)
617
618; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr
619; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
620; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
621; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
622; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
623; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4
624define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) {
625entry:
626 %tmp = load float, float addrspace(1)* %a, align 4
627 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
628 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
629 %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
630 store float %call, float addrspace(1)* %a, align 4
631 ret void
632}
633
634; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
635; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp)
636define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) {
637entry:
638 %tmp = load float, float addrspace(1)* %a, align 4
639 %call = tail call fast float @_Z4sqrtf(float %tmp)
640 store float %call, float addrspace(1)* %a, align 4
641 ret void
642}
643
Stanislav Mekhanoshin312c5572017-08-28 18:00:08 +0000644; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
645; GCN: tail call fast double @_Z4sqrtd(double %tmp)
646define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) {
647entry:
648 %tmp = load double, double addrspace(1)* %a, align 8
649 %call = tail call fast double @_Z4sqrtd(double %tmp)
650 store double %call, double addrspace(1)* %a, align 8
651 ret void
652}
653
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +0000654declare float @_Z4sqrtf(float)
Stanislav Mekhanoshin312c5572017-08-28 18:00:08 +0000655declare double @_Z4sqrtd(double)
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +0000656
657; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt
658; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp)
659define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) {
660entry:
661 %tmp = load float, float addrspace(1)* %a, align 4
662 %call = tail call fast float @_Z5rsqrtf(float %tmp)
663 store float %call, float addrspace(1)* %a, align 4
664 ret void
665}
666
667declare float @_Z5rsqrtf(float)
668
669; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan
670; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp)
671define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) {
672entry:
673 %tmp = load float, float addrspace(1)* %a, align 4
674 %call = tail call fast float @_Z3tanf(float %tmp)
675 store float %call, float addrspace(1)* %a, align 4
676 ret void
677}
678
679declare float @_Z3tanf(float)
680
681; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos
682; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp)
683; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp)
684define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) {
685entry:
686 %tmp = load float, float addrspace(1)* %a, align 4
687 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000688 %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float*
689 %call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1)
Stanislav Mekhanoshin7f377942017-08-11 16:42:09 +0000690 store float %call, float addrspace(1)* %a, align 4
691 ret void
692}
693
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000694declare float @_Z6sincosfPf(float, float*)
Yaxun Liufc5121a2017-09-06 00:30:27 +0000695
696%opencl.pipe_t = type opaque
697%opencl.reserve_id_t = type opaque
698
699; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000700; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[NOUNWIND:[0-9]+]]
701; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[NOUNWIND]]
Yaxun Liufc5121a2017-09-06 00:30:27 +0000702define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
703entry:
704 %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000705 %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
706 %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
707 %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4)
708 %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
709 tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4)
Yaxun Liufc5121a2017-09-06 00:30:27 +0000710 ret void
711}
712
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000713declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32)
Yaxun Liufc5121a2017-09-06 00:30:27 +0000714
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000715declare %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32)
Yaxun Liufc5121a2017-09-06 00:30:27 +0000716
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000717declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32)
Yaxun Liufc5121a2017-09-06 00:30:27 +0000718
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000719declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32)
Yaxun Liufc5121a2017-09-06 00:30:27 +0000720
721; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000722; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[NOUNWIND]]
723; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[NOUNWIND]]
Yaxun Liufc5121a2017-09-06 00:30:27 +0000724define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
725entry:
726 %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000727 %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
728 %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
729 %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0
730 %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
731 tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000732 ret void
733}
734
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000735declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) local_unnamed_addr
Yaxun Liufc5121a2017-09-06 00:30:27 +0000736
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000737declare %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr
Yaxun Liufc5121a2017-09-06 00:30:27 +0000738
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000739declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) local_unnamed_addr
Yaxun Liufc5121a2017-09-06 00:30:27 +0000740
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000741declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) local_unnamed_addr
Yaxun Liufc5121a2017-09-06 00:30:27 +0000742
743%struct.S = type { [100 x i32] }
744
745; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000746; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[NOUNWIND]]
747; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[NOUNWIND]]
748; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[NOUNWIND]]
749; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[NOUNWIND]]
750; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[NOUNWIND]]
751; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[NOUNWIND]]
752; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[NOUNWIND]]
753; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[NOUNWIND]]
754; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[NOUNWIND]]
Yaxun Liufc5121a2017-09-06 00:30:27 +0000755define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 {
756entry:
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000757 %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8*
758 %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000759 %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000760 %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8*
761 %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000762 %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000763 %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8*
764 %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000765 %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000766 %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8*
767 %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000768 %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000769 %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8*
770 %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000771 %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000772 %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8*
773 %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000774 %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000775 %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8*
776 %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000777 %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000778 %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8*
779 %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000780 %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)*
Yaxun Liu0d9673c2017-11-04 17:37:43 +0000781 %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8*
782 %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0
Yaxun Liufc5121a2017-09-06 00:30:27 +0000783 ret void
784}
785
786; CGN-PRELINK: attributes #[[NOUNWIND]] = { nounwind }
787attributes #0 = { nounwind }