blob: 2d6c72bce0431612811f2f31caba461919883649 [file] [log] [blame]
Matt Arsenault4b7938c2017-11-13 23:24:26 +00001; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
Matt Arsenaultfcc213f2017-09-20 03:20:09 +00003
4; GCN-LABEL: {{^}}store_global_hi_v2i16:
5; GCN: s_waitcnt
6
7; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
8
9; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
10; VI-NEXT: flat_store_short v[0:1], v2
11
12; GCN-NEXT: s_waitcnt
13; GCN-NEXT: s_setpc_b64
14define void @store_global_hi_v2i16(i16 addrspace(1)* %out, i32 %arg) #0 {
15entry:
16 ; FIXME: ABI for pre-gfx9
17 %value = bitcast i32 %arg to <2 x i16>
18 %hi = extractelement <2 x i16> %value, i32 1
19 store i16 %hi, i16 addrspace(1)* %out
20 ret void
21}
22
23; GCN-LABEL: {{^}}store_global_hi_v2f16:
24; GCN: s_waitcnt
25
26; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
27
28; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
29; VI-NEXT: flat_store_short v[0:1], v2
30
31; GCN-NEXT: s_waitcnt
32; GCN-NEXT: s_setpc_b64
33define void @store_global_hi_v2f16(half addrspace(1)* %out, i32 %arg) #0 {
34entry:
35 ; FIXME: ABI for pre-gfx9
36 %value = bitcast i32 %arg to <2 x half>
37 %hi = extractelement <2 x half> %value, i32 1
38 store half %hi, half addrspace(1)* %out
39 ret void
40}
41
42; GCN-LABEL: {{^}}store_global_hi_i32_shift:
43; GCN: s_waitcnt
44
45; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
46
47; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
48; VI-NEXT: flat_store_short v[0:1], v2
49
50; GCN-NEXT: s_waitcnt
51; GCN-NEXT: s_setpc_b64
52define void @store_global_hi_i32_shift(i16 addrspace(1)* %out, i32 %value) #0 {
53entry:
54 %hi32 = lshr i32 %value, 16
55 %hi = trunc i32 %hi32 to i16
56 store i16 %hi, i16 addrspace(1)* %out
57 ret void
58}
59
60; GCN-LABEL: {{^}}store_global_hi_v2i16_i8:
61; GCN: s_waitcnt
62
63; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
64
65; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
66; VI-NEXT: flat_store_byte v[0:1], v2
67
68; GCN-NEXT: s_waitcnt
69; GCN-NEXT: s_setpc_b64
70define void @store_global_hi_v2i16_i8(i8 addrspace(1)* %out, i32 %arg) #0 {
71entry:
72 %value = bitcast i32 %arg to <2 x i16>
73 %hi = extractelement <2 x i16> %value, i32 1
74 %trunc = trunc i16 %hi to i8
75 store i8 %trunc, i8 addrspace(1)* %out
76 ret void
77}
78
79; GCN-LABEL: {{^}}store_global_hi_i8_shift:
80; GCN: s_waitcnt
81
82; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
83
84; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
85; VI-NEXT: flat_store_byte v[0:1], v2
86
87; GCN-NEXT: s_waitcnt
88; GCN-NEXT: s_setpc_b64
89define void @store_global_hi_i8_shift(i8 addrspace(1)* %out, i32 %value) #0 {
90entry:
91 %hi32 = lshr i32 %value, 16
92 %hi = trunc i32 %hi32 to i8
93 store i8 %hi, i8 addrspace(1)* %out
94 ret void
95}
96
97; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset:
98; GCN: s_waitcnt
99; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
100
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000101; VI-DAG: v_add_u32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000102; VI-DAG: v_addc_u32_e32
103; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
104
105; VI: flat_store_short v[0:1], v2{{$}}
106; GCN-NEXT: s_waitcnt
107; GCN-NEXT: s_setpc_b64
108define void @store_global_hi_v2i16_max_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
109entry:
110 ; FIXME: ABI for pre-gfx9
111 %value = bitcast i32 %arg to <2 x i16>
112 %hi = extractelement <2 x i16> %value, i32 1
113 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 2047
114 store i16 %hi, i16 addrspace(1)* %gep
115 ret void
116}
117
118; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset:
119; GCN: s_waitcnt
120; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
121
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000122; VI-DAG: v_add_u32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000123; VI-DAG: v_addc_u32_e32
124; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
125
126; VI: flat_store_short v[0:1], v{{[0-9]$}}
127; GCN-NEXT: s_waitcnt
128; GCN-NEXT: s_setpc_b64
129define void @store_global_hi_v2i16_min_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
130entry:
131 %value = bitcast i32 %arg to <2 x i16>
132 %hi = extractelement <2 x i16> %value, i32 1
133 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 -2048
134 store i16 %hi, i16 addrspace(1)* %gep
135 ret void
136}
137
138; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset:
139; GCN: s_waitcnt
140; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
141
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000142; VI-DAG: v_add_u32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000143; VI-DAG: v_addc_u32_e32
144; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
145; VI: flat_store_byte v[0:1], v{{[0-9]$}}
146
147; GCN-NEXT: s_waitcnt
148; GCN-NEXT: s_setpc_b64
149define void @store_global_hi_v2i16_i8_max_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
150entry:
151 %value = bitcast i32 %arg to <2 x i16>
152 %hi = extractelement <2 x i16> %value, i32 1
153 %trunc = trunc i16 %hi to i8
154 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4095
155 store i8 %trunc, i8 addrspace(1)* %gep
156 ret void
157}
158
159; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset:
160; GCN: s_waitcnt
161; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
162
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000163; VI-DAG: v_add_u32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000164; VI-DAG: v_addc_u32_e32
165; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
166
167; VI: flat_store_byte v[0:1], v{{[0-9]$}}
168; GCN-NEXT: s_waitcnt
169; GCN-NEXT: s_setpc_b64
170define void @store_global_hi_v2i16_i8_min_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
171entry:
172 %value = bitcast i32 %arg to <2 x i16>
173 %hi = extractelement <2 x i16> %value, i32 1
174 %trunc = trunc i16 %hi to i8
175 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 -4095
176 store i8 %trunc, i8 addrspace(1)* %gep
177 ret void
178}
179
180; GCN-LABEL: {{^}}store_flat_hi_v2i16:
181; GCN: s_waitcnt
182
183; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
184
185; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
186; VI-NEXT: flat_store_short v[0:1], v2
187
188; GCN-NEXT: s_waitcnt
189; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000190define void @store_flat_hi_v2i16(i16* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000191entry:
192 %value = bitcast i32 %arg to <2 x i16>
193 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000194 store i16 %hi, i16* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000195 ret void
196}
197
198; GCN-LABEL: {{^}}store_flat_hi_v2f16:
199; GCN: s_waitcnt
200
201; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
202
203; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
204; VI-NEXT: flat_store_short v[0:1], v2
205
206; GCN-NEXT: s_waitcnt
207; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000208define void @store_flat_hi_v2f16(half* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000209entry:
210 %value = bitcast i32 %arg to <2 x half>
211 %hi = extractelement <2 x half> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000212 store half %hi, half* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000213 ret void
214}
215
216; GCN-LABEL: {{^}}store_flat_hi_i32_shift:
217; GCN: s_waitcnt
218
219; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
220
221; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
222; VI-NEXT: flat_store_short v[0:1], v2
223
224; GCN-NEXT: s_waitcnt
225; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000226define void @store_flat_hi_i32_shift(i16* %out, i32 %value) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000227entry:
228 %hi32 = lshr i32 %value, 16
229 %hi = trunc i32 %hi32 to i16
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000230 store i16 %hi, i16* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000231 ret void
232}
233
234; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8:
235; GCN: s_waitcnt
236
237; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
238
239; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
240; VI-NEXT: flat_store_byte v[0:1], v2
241
242; GCN-NEXT: s_waitcnt
243; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000244define void @store_flat_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000245entry:
246 %value = bitcast i32 %arg to <2 x i16>
247 %hi = extractelement <2 x i16> %value, i32 1
248 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000249 store i8 %trunc, i8* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000250 ret void
251}
252
253; GCN-LABEL: {{^}}store_flat_hi_i8_shift:
254; GCN: s_waitcnt
255
256; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
257
258; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
259; VI-NEXT: flat_store_byte v[0:1], v2
260
261; GCN-NEXT: s_waitcnt
262; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000263define void @store_flat_hi_i8_shift(i8* %out, i32 %value) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000264entry:
265 %hi32 = lshr i32 %value, 16
266 %hi = trunc i32 %hi32 to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000267 store i8 %hi, i8* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000268 ret void
269}
270
271; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset:
272; GCN: s_waitcnt
273; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
274
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000275; VI-DAG: v_add_u32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000276; VI-DAG: v_addc_u32_e32
277; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
278; VI: flat_store_short v[0:1], v2{{$}}
279; GCN-NEXT: s_waitcnt
280; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000281define void @store_flat_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000282entry:
283 %value = bitcast i32 %arg to <2 x i16>
284 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000285 %gep = getelementptr inbounds i16, i16* %out, i64 2047
286 store i16 %hi, i16* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000287 ret void
288}
289
290; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
291; GCN: s_waitcnt
Matt Arsenault9a7e29a2017-11-29 02:25:14 +0000292; GCN: v_add{{(_co)?}}_{{i|u}}32_e32
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000293; VI: v_addc_u32_e32
294; GFX9: v_addc_co_u32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000295
296; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
297; VI: flat_store_short v[0:1], v2{{$}}
298; GCN-NEXT: s_waitcnt
299; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000300define void @store_flat_hi_v2i16_neg_offset(i16* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000301entry:
302 %value = bitcast i32 %arg to <2 x i16>
303 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000304 %gep = getelementptr inbounds i16, i16* %out, i64 -1023
305 store i16 %hi, i16* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000306 ret void
307}
308
309; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset:
310; GCN: s_waitcnt
311; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
312
313; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000314; VI-DAG: v_add_u32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000315; VI-DAG: v_addc_u32_e32
316; VI: flat_store_byte v[0:1], v2{{$}}
317; GCN-NEXT: s_waitcnt
318; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000319define void @store_flat_hi_v2i16_i8_max_offset(i8* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000320entry:
321 %value = bitcast i32 %arg to <2 x i16>
322 %hi = extractelement <2 x i16> %value, i32 1
323 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000324 %gep = getelementptr inbounds i8, i8* %out, i64 4095
325 store i8 %trunc, i8* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000326 ret void
327}
328
329; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
330; GCN: s_waitcnt
Matt Arsenault9a7e29a2017-11-29 02:25:14 +0000331; GCN-DAG: v_add{{(_co)?}}_{{i|u}}32_e32
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000332; VI-DAG: v_addc_u32_e32
333; GFX9-DAG: v_addc_co_u32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000334
335; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
336; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
337; VI: flat_store_byte v[0:1], v2{{$}}
338; GCN-NEXT: s_waitcnt
339; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000340define void @store_flat_hi_v2i16_i8_neg_offset(i8* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000341entry:
342 %value = bitcast i32 %arg to <2 x i16>
343 %hi = extractelement <2 x i16> %value, i32 1
344 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000345 %gep = getelementptr inbounds i8, i8* %out, i64 -4095
346 store i8 %trunc, i8* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000347 ret void
348}
349
350; GCN-LABEL: {{^}}store_private_hi_v2i16:
351; GCN: s_waitcnt
352
353; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
354
355; VI: v_lshrrev_b32_e32 v1, 16, v1
356; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
357
358; GCN-NEXT: s_waitcnt
359; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000360define void @store_private_hi_v2i16(i16 addrspace(5)* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000361entry:
362 ; FIXME: ABI for pre-gfx9
363 %value = bitcast i32 %arg to <2 x i16>
364 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000365 store i16 %hi, i16 addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000366 ret void
367}
368
369; GCN-LABEL: {{^}}store_private_hi_v2f16:
370; GCN: s_waitcnt
371
372; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
373
374; VI: v_lshrrev_b32_e32 v1, 16, v1
375; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
376
377; GCN-NEXT: s_waitcnt
378; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000379define void @store_private_hi_v2f16(half addrspace(5)* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000380entry:
381 ; FIXME: ABI for pre-gfx9
382 %value = bitcast i32 %arg to <2 x half>
383 %hi = extractelement <2 x half> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000384 store half %hi, half addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000385 ret void
386}
387
388; GCN-LABEL: {{^}}store_private_hi_i32_shift:
389; GCN: s_waitcnt
390
391; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
392
393; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
394; VI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
395
396; GCN-NEXT: s_waitcnt
397; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000398define void @store_private_hi_i32_shift(i16 addrspace(5)* %out, i32 %value) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000399entry:
400 %hi32 = lshr i32 %value, 16
401 %hi = trunc i32 %hi32 to i16
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000402 store i16 %hi, i16 addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000403 ret void
404}
405
406; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
407; GCN: s_waitcnt
408
409; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
410
411; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
412; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
413
414; GCN-NEXT: s_waitcnt
415; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000416define void @store_private_hi_v2i16_i8(i8 addrspace(5)* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000417entry:
418 %value = bitcast i32 %arg to <2 x i16>
419 %hi = extractelement <2 x i16> %value, i32 1
420 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000421 store i8 %trunc, i8 addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000422 ret void
423}
424
425; GCN-LABEL: {{^}}store_private_hi_i8_shift:
426; GCN: s_waitcnt
427
428; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
429
430; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
431; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
432
433; GCN-NEXT: s_waitcnt
434; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000435define void @store_private_hi_i8_shift(i8 addrspace(5)* %out, i32 %value) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000436entry:
437 %hi32 = lshr i32 %value, 16
438 %hi = trunc i32 %hi32 to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000439 store i8 %hi, i8 addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000440 ret void
441}
442
443; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
444; GCN: s_waitcnt
Matt Arsenault45b98182017-11-15 00:45:43 +0000445; GFX9: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000446
Matt Arsenault45b98182017-11-15 00:45:43 +0000447; VI: v_lshrrev_b32_e32 v0, 16, v0
448; VI-NEXT: buffer_store_short v0, off, s[0:3], s5 offset:4094{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000449
450; GCN-NEXT: s_waitcnt
451; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000452define void @store_private_hi_v2i16_max_offset(i16 addrspace(5)* byval %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000453entry:
454 %value = bitcast i32 %arg to <2 x i16>
455 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000456 %gep = getelementptr inbounds i16, i16 addrspace(5)* %out, i64 2045
457 store i16 %hi, i16 addrspace(5)* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000458 ret void
459}
460
461
462
463; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
464; GCN: s_waitcnt
465
466; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s4{{$}}
467
468; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
469; VI-NEXT: buffer_store_short v0, off, s[0:3], s4{{$}}
470
471; GCN-NEXT: s_waitcnt
472; GCN-NEXT: s_setpc_b64
473define void @store_private_hi_v2i16_nooff(i32 %arg) #0 {
474entry:
475 ; FIXME: ABI for pre-gfx9
476 %value = bitcast i32 %arg to <2 x i16>
477 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000478 store volatile i16 %hi, i16 addrspace(5)* null
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000479 ret void
480}
481
482
483; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
484; GCN: s_waitcnt
485
486; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s4{{$}}
487
488; VI: v_lshrrev_b32_e32 v0, 16, v0
489; VI: buffer_store_byte v0, off, s[0:3], s4{{$}}
490
491; GCN-NEXT: s_waitcnt
492; GCN-NEXT: s_setpc_b64
493define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 {
494entry:
495 %value = bitcast i32 %arg to <2 x i16>
496 %hi = extractelement <2 x i16> %value, i32 1
497 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000498 store volatile i8 %trunc, i8 addrspace(5)* null
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000499 ret void
500}
501
502; GCN-LABEL: {{^}}store_local_hi_v2i16:
503; GCN: s_waitcnt
504
505; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
506
507; VI: v_lshrrev_b32_e32 v1, 16, v1
508; VI: ds_write_b16 v0, v1
509
510; GCN-NEXT: s_waitcnt
511; GCN-NEXT: s_setpc_b64
512define void @store_local_hi_v2i16(i16 addrspace(3)* %out, i32 %arg) #0 {
513entry:
514 ; FIXME: ABI for pre-gfx9
515 %value = bitcast i32 %arg to <2 x i16>
516 %hi = extractelement <2 x i16> %value, i32 1
517 store i16 %hi, i16 addrspace(3)* %out
518 ret void
519}
520
521; GCN-LABEL: {{^}}store_local_hi_v2f16:
522; GCN: s_waitcnt
523
524; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
525
526; VI: v_lshrrev_b32_e32 v1, 16, v1
527; VI: ds_write_b16 v0, v1
528
529; GCN-NEXT: s_waitcnt
530; GCN-NEXT: s_setpc_b64
531define void @store_local_hi_v2f16(half addrspace(3)* %out, i32 %arg) #0 {
532entry:
533 ; FIXME: ABI for pre-gfx9
534 %value = bitcast i32 %arg to <2 x half>
535 %hi = extractelement <2 x half> %value, i32 1
536 store half %hi, half addrspace(3)* %out
537 ret void
538}
539
540; GCN-LABEL: {{^}}store_local_hi_i32_shift:
541; GCN: s_waitcnt
542
543; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
544
545; VI: v_lshrrev_b32_e32 v1, 16, v1
546; VI: ds_write_b16 v0, v1
547
548; GCN-NEXT: s_waitcnt
549; GCN-NEXT: s_setpc_b64
550define void @store_local_hi_i32_shift(i16 addrspace(3)* %out, i32 %value) #0 {
551entry:
552 %hi32 = lshr i32 %value, 16
553 %hi = trunc i32 %hi32 to i16
554 store i16 %hi, i16 addrspace(3)* %out
555 ret void
556}
557
558; GCN-LABEL: {{^}}store_local_hi_v2i16_i8:
559; GCN: s_waitcnt
560
561; GFX9-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
562
563; VI: v_lshrrev_b32_e32 v1, 16, v1
564; VI: ds_write_b8 v0, v1
565
566; GCN-NEXT: s_waitcnt
567; GCN-NEXT: s_setpc_b64
568define void @store_local_hi_v2i16_i8(i8 addrspace(3)* %out, i32 %arg) #0 {
569entry:
570 %value = bitcast i32 %arg to <2 x i16>
571 %hi = extractelement <2 x i16> %value, i32 1
572 %trunc = trunc i16 %hi to i8
573 store i8 %trunc, i8 addrspace(3)* %out
574 ret void
575}
576
577; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset:
578; GCN: s_waitcnt
579; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
580
581; VI: v_lshrrev_b32_e32 v1, 16, v1
582; VI: ds_write_b16 v0, v1 offset:65534{{$}}
583
584; GCN-NEXT: s_waitcnt
585; GCN-NEXT: s_setpc_b64
586define void @store_local_hi_v2i16_max_offset(i16 addrspace(3)* %out, i32 %arg) #0 {
587entry:
588 ; FIXME: ABI for pre-gfx9
589 %value = bitcast i32 %arg to <2 x i16>
590 %hi = extractelement <2 x i16> %value, i32 1
591 %gep = getelementptr inbounds i16, i16 addrspace(3)* %out, i64 32767
592 store i16 %hi, i16 addrspace(3)* %gep
593 ret void
594}
595
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000596; GCN-LABEL: {{^}}store_private_hi_v2i16_to_offset:
597; GCN: s_waitcnt
598; GFX9: buffer_store_dword
599; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094
600define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 {
601entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000602 %obj0 = alloca [10 x i32], align 4, addrspace(5)
603 %obj1 = alloca [4096 x i16], align 2, addrspace(5)
604 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
605 store volatile i32 123, i32 addrspace(5)* %bc
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000606 %value = bitcast i32 %arg to <2 x i16>
607 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000608 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025
609 store i16 %hi, i16 addrspace(5)* %gep
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000610 ret void
611}
612
613; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_to_offset:
614; GCN: s_waitcnt
615; GFX9: buffer_store_dword
616; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s5 offset:4095
617define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 {
618entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000619 %obj0 = alloca [10 x i32], align 4, addrspace(5)
620 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
621 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
622 store volatile i32 123, i32 addrspace(5)* %bc
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000623 %value = bitcast i32 %arg to <2 x i16>
624 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000625 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000626 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000627 store i8 %trunc, i8 addrspace(5)* %gep
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000628 ret void
629}
630
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000631attributes #0 = { nounwind }