blob: 1749e53930eea7513d4961c70d10484f7f33a95d [file] [log] [blame]
Matt Arsenault4b7938c2017-11-13 23:24:26 +00001; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
Matt Arsenaultfcc213f2017-09-20 03:20:09 +00003
4; GCN-LABEL: {{^}}store_global_hi_v2i16:
5; GCN: s_waitcnt
6
7; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
8
9; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
10; VI-NEXT: flat_store_short v[0:1], v2
11
12; GCN-NEXT: s_waitcnt
13; GCN-NEXT: s_setpc_b64
14define void @store_global_hi_v2i16(i16 addrspace(1)* %out, i32 %arg) #0 {
15entry:
16 ; FIXME: ABI for pre-gfx9
17 %value = bitcast i32 %arg to <2 x i16>
18 %hi = extractelement <2 x i16> %value, i32 1
19 store i16 %hi, i16 addrspace(1)* %out
20 ret void
21}
22
23; GCN-LABEL: {{^}}store_global_hi_v2f16:
24; GCN: s_waitcnt
25
26; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
27
28; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
29; VI-NEXT: flat_store_short v[0:1], v2
30
31; GCN-NEXT: s_waitcnt
32; GCN-NEXT: s_setpc_b64
33define void @store_global_hi_v2f16(half addrspace(1)* %out, i32 %arg) #0 {
34entry:
35 ; FIXME: ABI for pre-gfx9
36 %value = bitcast i32 %arg to <2 x half>
37 %hi = extractelement <2 x half> %value, i32 1
38 store half %hi, half addrspace(1)* %out
39 ret void
40}
41
42; GCN-LABEL: {{^}}store_global_hi_i32_shift:
43; GCN: s_waitcnt
44
45; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
46
47; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
48; VI-NEXT: flat_store_short v[0:1], v2
49
50; GCN-NEXT: s_waitcnt
51; GCN-NEXT: s_setpc_b64
52define void @store_global_hi_i32_shift(i16 addrspace(1)* %out, i32 %value) #0 {
53entry:
54 %hi32 = lshr i32 %value, 16
55 %hi = trunc i32 %hi32 to i16
56 store i16 %hi, i16 addrspace(1)* %out
57 ret void
58}
59
60; GCN-LABEL: {{^}}store_global_hi_v2i16_i8:
61; GCN: s_waitcnt
62
63; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
64
65; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
66; VI-NEXT: flat_store_byte v[0:1], v2
67
68; GCN-NEXT: s_waitcnt
69; GCN-NEXT: s_setpc_b64
70define void @store_global_hi_v2i16_i8(i8 addrspace(1)* %out, i32 %arg) #0 {
71entry:
72 %value = bitcast i32 %arg to <2 x i16>
73 %hi = extractelement <2 x i16> %value, i32 1
74 %trunc = trunc i16 %hi to i8
75 store i8 %trunc, i8 addrspace(1)* %out
76 ret void
77}
78
79; GCN-LABEL: {{^}}store_global_hi_i8_shift:
80; GCN: s_waitcnt
81
82; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
83
84; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
85; VI-NEXT: flat_store_byte v[0:1], v2
86
87; GCN-NEXT: s_waitcnt
88; GCN-NEXT: s_setpc_b64
89define void @store_global_hi_i8_shift(i8 addrspace(1)* %out, i32 %value) #0 {
90entry:
91 %hi32 = lshr i32 %value, 16
92 %hi = trunc i32 %hi32 to i8
93 store i8 %hi, i8 addrspace(1)* %out
94 ret void
95}
96
97; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset:
98; GCN: s_waitcnt
99; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
100
101; VI-DAG: v_add_i32_e32
102; VI-DAG: v_addc_u32_e32
103; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
104
105; VI: flat_store_short v[0:1], v2{{$}}
106; GCN-NEXT: s_waitcnt
107; GCN-NEXT: s_setpc_b64
108define void @store_global_hi_v2i16_max_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
109entry:
110 ; FIXME: ABI for pre-gfx9
111 %value = bitcast i32 %arg to <2 x i16>
112 %hi = extractelement <2 x i16> %value, i32 1
113 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 2047
114 store i16 %hi, i16 addrspace(1)* %gep
115 ret void
116}
117
118; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset:
119; GCN: s_waitcnt
120; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
121
122; VI-DAG: v_add_i32_e32
123; VI-DAG: v_addc_u32_e32
124; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
125
126; VI: flat_store_short v[0:1], v{{[0-9]$}}
127; GCN-NEXT: s_waitcnt
128; GCN-NEXT: s_setpc_b64
129define void @store_global_hi_v2i16_min_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
130entry:
131 %value = bitcast i32 %arg to <2 x i16>
132 %hi = extractelement <2 x i16> %value, i32 1
133 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 -2048
134 store i16 %hi, i16 addrspace(1)* %gep
135 ret void
136}
137
138; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset:
139; GCN: s_waitcnt
140; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
141
142; VI-DAG: v_add_i32_e32
143; VI-DAG: v_addc_u32_e32
144; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
145; VI: flat_store_byte v[0:1], v{{[0-9]$}}
146
147; GCN-NEXT: s_waitcnt
148; GCN-NEXT: s_setpc_b64
149define void @store_global_hi_v2i16_i8_max_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
150entry:
151 %value = bitcast i32 %arg to <2 x i16>
152 %hi = extractelement <2 x i16> %value, i32 1
153 %trunc = trunc i16 %hi to i8
154 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4095
155 store i8 %trunc, i8 addrspace(1)* %gep
156 ret void
157}
158
159; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset:
160; GCN: s_waitcnt
161; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
162
163; VI-DAG: v_add_i32_e32
164; VI-DAG: v_addc_u32_e32
165; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
166
167; VI: flat_store_byte v[0:1], v{{[0-9]$}}
168; GCN-NEXT: s_waitcnt
169; GCN-NEXT: s_setpc_b64
170define void @store_global_hi_v2i16_i8_min_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
171entry:
172 %value = bitcast i32 %arg to <2 x i16>
173 %hi = extractelement <2 x i16> %value, i32 1
174 %trunc = trunc i16 %hi to i8
175 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 -4095
176 store i8 %trunc, i8 addrspace(1)* %gep
177 ret void
178}
179
180; GCN-LABEL: {{^}}store_flat_hi_v2i16:
181; GCN: s_waitcnt
182
183; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
184
185; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
186; VI-NEXT: flat_store_short v[0:1], v2
187
188; GCN-NEXT: s_waitcnt
189; GCN-NEXT: s_setpc_b64
190define void @store_flat_hi_v2i16(i16 addrspace(4)* %out, i32 %arg) #0 {
191entry:
192 %value = bitcast i32 %arg to <2 x i16>
193 %hi = extractelement <2 x i16> %value, i32 1
194 store i16 %hi, i16 addrspace(4)* %out
195 ret void
196}
197
198; GCN-LABEL: {{^}}store_flat_hi_v2f16:
199; GCN: s_waitcnt
200
201; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
202
203; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
204; VI-NEXT: flat_store_short v[0:1], v2
205
206; GCN-NEXT: s_waitcnt
207; GCN-NEXT: s_setpc_b64
208define void @store_flat_hi_v2f16(half addrspace(4)* %out, i32 %arg) #0 {
209entry:
210 %value = bitcast i32 %arg to <2 x half>
211 %hi = extractelement <2 x half> %value, i32 1
212 store half %hi, half addrspace(4)* %out
213 ret void
214}
215
216; GCN-LABEL: {{^}}store_flat_hi_i32_shift:
217; GCN: s_waitcnt
218
219; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
220
221; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
222; VI-NEXT: flat_store_short v[0:1], v2
223
224; GCN-NEXT: s_waitcnt
225; GCN-NEXT: s_setpc_b64
226define void @store_flat_hi_i32_shift(i16 addrspace(4)* %out, i32 %value) #0 {
227entry:
228 %hi32 = lshr i32 %value, 16
229 %hi = trunc i32 %hi32 to i16
230 store i16 %hi, i16 addrspace(4)* %out
231 ret void
232}
233
234; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8:
235; GCN: s_waitcnt
236
237; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
238
239; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
240; VI-NEXT: flat_store_byte v[0:1], v2
241
242; GCN-NEXT: s_waitcnt
243; GCN-NEXT: s_setpc_b64
244define void @store_flat_hi_v2i16_i8(i8 addrspace(4)* %out, i32 %arg) #0 {
245entry:
246 %value = bitcast i32 %arg to <2 x i16>
247 %hi = extractelement <2 x i16> %value, i32 1
248 %trunc = trunc i16 %hi to i8
249 store i8 %trunc, i8 addrspace(4)* %out
250 ret void
251}
252
253; GCN-LABEL: {{^}}store_flat_hi_i8_shift:
254; GCN: s_waitcnt
255
256; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
257
258; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
259; VI-NEXT: flat_store_byte v[0:1], v2
260
261; GCN-NEXT: s_waitcnt
262; GCN-NEXT: s_setpc_b64
263define void @store_flat_hi_i8_shift(i8 addrspace(4)* %out, i32 %value) #0 {
264entry:
265 %hi32 = lshr i32 %value, 16
266 %hi = trunc i32 %hi32 to i8
267 store i8 %hi, i8 addrspace(4)* %out
268 ret void
269}
270
271; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset:
272; GCN: s_waitcnt
273; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
274
275; VI-DAG: v_add_i32_e32
276; VI-DAG: v_addc_u32_e32
277; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
278; VI: flat_store_short v[0:1], v2{{$}}
279; GCN-NEXT: s_waitcnt
280; GCN-NEXT: s_setpc_b64
281define void @store_flat_hi_v2i16_max_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
282entry:
283 %value = bitcast i32 %arg to <2 x i16>
284 %hi = extractelement <2 x i16> %value, i32 1
285 %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 2047
286 store i16 %hi, i16 addrspace(4)* %gep
287 ret void
288}
289
290; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
291; GCN: s_waitcnt
292; GCN: v_add_i32_e32
293; GCN: v_addc_u32_e32
294
295; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
296; VI: flat_store_short v[0:1], v2{{$}}
297; GCN-NEXT: s_waitcnt
298; GCN-NEXT: s_setpc_b64
299define void @store_flat_hi_v2i16_neg_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
300entry:
301 %value = bitcast i32 %arg to <2 x i16>
302 %hi = extractelement <2 x i16> %value, i32 1
303 %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 -1023
304 store i16 %hi, i16 addrspace(4)* %gep
305 ret void
306}
307
308; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset:
309; GCN: s_waitcnt
310; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
311
312; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
313; VI-DAG: v_add_i32_e32
314; VI-DAG: v_addc_u32_e32
315; VI: flat_store_byte v[0:1], v2{{$}}
316; GCN-NEXT: s_waitcnt
317; GCN-NEXT: s_setpc_b64
318define void @store_flat_hi_v2i16_i8_max_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
319entry:
320 %value = bitcast i32 %arg to <2 x i16>
321 %hi = extractelement <2 x i16> %value, i32 1
322 %trunc = trunc i16 %hi to i8
323 %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 4095
324 store i8 %trunc, i8 addrspace(4)* %gep
325 ret void
326}
327
328; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
329; GCN: s_waitcnt
330; GCN-DAG: v_add_i32_e32
331; GCN-DAG: v_addc_u32_e32
332
333; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
334; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
335; VI: flat_store_byte v[0:1], v2{{$}}
336; GCN-NEXT: s_waitcnt
337; GCN-NEXT: s_setpc_b64
338define void @store_flat_hi_v2i16_i8_neg_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
339entry:
340 %value = bitcast i32 %arg to <2 x i16>
341 %hi = extractelement <2 x i16> %value, i32 1
342 %trunc = trunc i16 %hi to i8
343 %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 -4095
344 store i8 %trunc, i8 addrspace(4)* %gep
345 ret void
346}
347
348; GCN-LABEL: {{^}}store_private_hi_v2i16:
349; GCN: s_waitcnt
350
351; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
352
353; VI: v_lshrrev_b32_e32 v1, 16, v1
354; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
355
356; GCN-NEXT: s_waitcnt
357; GCN-NEXT: s_setpc_b64
358define void @store_private_hi_v2i16(i16* %out, i32 %arg) #0 {
359entry:
360 ; FIXME: ABI for pre-gfx9
361 %value = bitcast i32 %arg to <2 x i16>
362 %hi = extractelement <2 x i16> %value, i32 1
363 store i16 %hi, i16* %out
364 ret void
365}
366
367; GCN-LABEL: {{^}}store_private_hi_v2f16:
368; GCN: s_waitcnt
369
370; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
371
372; VI: v_lshrrev_b32_e32 v1, 16, v1
373; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
374
375; GCN-NEXT: s_waitcnt
376; GCN-NEXT: s_setpc_b64
377define void @store_private_hi_v2f16(half* %out, i32 %arg) #0 {
378entry:
379 ; FIXME: ABI for pre-gfx9
380 %value = bitcast i32 %arg to <2 x half>
381 %hi = extractelement <2 x half> %value, i32 1
382 store half %hi, half* %out
383 ret void
384}
385
386; GCN-LABEL: {{^}}store_private_hi_i32_shift:
387; GCN: s_waitcnt
388
389; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
390
391; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
392; VI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
393
394; GCN-NEXT: s_waitcnt
395; GCN-NEXT: s_setpc_b64
396define void @store_private_hi_i32_shift(i16* %out, i32 %value) #0 {
397entry:
398 %hi32 = lshr i32 %value, 16
399 %hi = trunc i32 %hi32 to i16
400 store i16 %hi, i16* %out
401 ret void
402}
403
404; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
405; GCN: s_waitcnt
406
407; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
408
409; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
410; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
411
412; GCN-NEXT: s_waitcnt
413; GCN-NEXT: s_setpc_b64
414define void @store_private_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
415entry:
416 %value = bitcast i32 %arg to <2 x i16>
417 %hi = extractelement <2 x i16> %value, i32 1
418 %trunc = trunc i16 %hi to i8
419 store i8 %trunc, i8* %out
420 ret void
421}
422
423; GCN-LABEL: {{^}}store_private_hi_i8_shift:
424; GCN: s_waitcnt
425
426; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
427
428; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
429; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
430
431; GCN-NEXT: s_waitcnt
432; GCN-NEXT: s_setpc_b64
433define void @store_private_hi_i8_shift(i8* %out, i32 %value) #0 {
434entry:
435 %hi32 = lshr i32 %value, 16
436 %hi = trunc i32 %hi32 to i8
437 store i8 %hi, i8* %out
438 ret void
439}
440
441; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
442; GCN: s_waitcnt
443; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen offset:4094{{$}}
444
445; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
446; VI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen offset:4094{{$}}
447
448; GCN-NEXT: s_waitcnt
449; GCN-NEXT: s_setpc_b64
450define void @store_private_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 {
451entry:
452 %value = bitcast i32 %arg to <2 x i16>
453 %hi = extractelement <2 x i16> %value, i32 1
454 %gep = getelementptr inbounds i16, i16* %out, i64 2047
455 store i16 %hi, i16* %gep
456 ret void
457}
458
459
460
461; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
462; GCN: s_waitcnt
463
464; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s4{{$}}
465
466; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
467; VI-NEXT: buffer_store_short v0, off, s[0:3], s4{{$}}
468
469; GCN-NEXT: s_waitcnt
470; GCN-NEXT: s_setpc_b64
471define void @store_private_hi_v2i16_nooff(i32 %arg) #0 {
472entry:
473 ; FIXME: ABI for pre-gfx9
474 %value = bitcast i32 %arg to <2 x i16>
475 %hi = extractelement <2 x i16> %value, i32 1
476 store volatile i16 %hi, i16* null
477 ret void
478}
479
480
481; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
482; GCN: s_waitcnt
483
484; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s4{{$}}
485
486; VI: v_lshrrev_b32_e32 v0, 16, v0
487; VI: buffer_store_byte v0, off, s[0:3], s4{{$}}
488
489; GCN-NEXT: s_waitcnt
490; GCN-NEXT: s_setpc_b64
491define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 {
492entry:
493 %value = bitcast i32 %arg to <2 x i16>
494 %hi = extractelement <2 x i16> %value, i32 1
495 %trunc = trunc i16 %hi to i8
496 store volatile i8 %trunc, i8* null
497 ret void
498}
499
500; GCN-LABEL: {{^}}store_local_hi_v2i16:
501; GCN: s_waitcnt
502
503; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
504
505; VI: v_lshrrev_b32_e32 v1, 16, v1
506; VI: ds_write_b16 v0, v1
507
508; GCN-NEXT: s_waitcnt
509; GCN-NEXT: s_setpc_b64
510define void @store_local_hi_v2i16(i16 addrspace(3)* %out, i32 %arg) #0 {
511entry:
512 ; FIXME: ABI for pre-gfx9
513 %value = bitcast i32 %arg to <2 x i16>
514 %hi = extractelement <2 x i16> %value, i32 1
515 store i16 %hi, i16 addrspace(3)* %out
516 ret void
517}
518
519; GCN-LABEL: {{^}}store_local_hi_v2f16:
520; GCN: s_waitcnt
521
522; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
523
524; VI: v_lshrrev_b32_e32 v1, 16, v1
525; VI: ds_write_b16 v0, v1
526
527; GCN-NEXT: s_waitcnt
528; GCN-NEXT: s_setpc_b64
529define void @store_local_hi_v2f16(half addrspace(3)* %out, i32 %arg) #0 {
530entry:
531 ; FIXME: ABI for pre-gfx9
532 %value = bitcast i32 %arg to <2 x half>
533 %hi = extractelement <2 x half> %value, i32 1
534 store half %hi, half addrspace(3)* %out
535 ret void
536}
537
538; GCN-LABEL: {{^}}store_local_hi_i32_shift:
539; GCN: s_waitcnt
540
541; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
542
543; VI: v_lshrrev_b32_e32 v1, 16, v1
544; VI: ds_write_b16 v0, v1
545
546; GCN-NEXT: s_waitcnt
547; GCN-NEXT: s_setpc_b64
548define void @store_local_hi_i32_shift(i16 addrspace(3)* %out, i32 %value) #0 {
549entry:
550 %hi32 = lshr i32 %value, 16
551 %hi = trunc i32 %hi32 to i16
552 store i16 %hi, i16 addrspace(3)* %out
553 ret void
554}
555
556; GCN-LABEL: {{^}}store_local_hi_v2i16_i8:
557; GCN: s_waitcnt
558
559; GFX9-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
560
561; VI: v_lshrrev_b32_e32 v1, 16, v1
562; VI: ds_write_b8 v0, v1
563
564; GCN-NEXT: s_waitcnt
565; GCN-NEXT: s_setpc_b64
566define void @store_local_hi_v2i16_i8(i8 addrspace(3)* %out, i32 %arg) #0 {
567entry:
568 %value = bitcast i32 %arg to <2 x i16>
569 %hi = extractelement <2 x i16> %value, i32 1
570 %trunc = trunc i16 %hi to i8
571 store i8 %trunc, i8 addrspace(3)* %out
572 ret void
573}
574
575; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset:
576; GCN: s_waitcnt
577; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
578
579; VI: v_lshrrev_b32_e32 v1, 16, v1
580; VI: ds_write_b16 v0, v1 offset:65534{{$}}
581
582; GCN-NEXT: s_waitcnt
583; GCN-NEXT: s_setpc_b64
584define void @store_local_hi_v2i16_max_offset(i16 addrspace(3)* %out, i32 %arg) #0 {
585entry:
586 ; FIXME: ABI for pre-gfx9
587 %value = bitcast i32 %arg to <2 x i16>
588 %hi = extractelement <2 x i16> %value, i32 1
589 %gep = getelementptr inbounds i16, i16 addrspace(3)* %out, i64 32767
590 store i16 %hi, i16 addrspace(3)* %gep
591 ret void
592}
593
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000594; GCN-LABEL: {{^}}store_private_hi_v2i16_to_offset:
595; GCN: s_waitcnt
596; GFX9: buffer_store_dword
597; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094
598define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 {
599entry:
600 %obj0 = alloca [10 x i32], align 4
601 %obj1 = alloca [4096 x i16], align 2
602 %bc = bitcast [10 x i32]* %obj0 to i32*
603 store volatile i32 123, i32* %bc
604 %value = bitcast i32 %arg to <2 x i16>
605 %hi = extractelement <2 x i16> %value, i32 1
606 %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
607 store i16 %hi, i16* %gep
608 ret void
609}
610
611; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_to_offset:
612; GCN: s_waitcnt
613; GFX9: buffer_store_dword
614; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s5 offset:4095
615define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 {
616entry:
617 %obj0 = alloca [10 x i32], align 4
618 %obj1 = alloca [4096 x i8], align 2
619 %bc = bitcast [10 x i32]* %obj0 to i32*
620 store volatile i32 123, i32* %bc
621 %value = bitcast i32 %arg to <2 x i16>
622 %hi = extractelement <2 x i16> %value, i32 1
623 %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
624 %trunc = trunc i16 %hi to i8
625 store i8 %trunc, i8* %gep
626 ret void
627}
628
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000629attributes #0 = { nounwind }