blob: f7b66caf61286317bd60b40f0c93893de6110579 [file] [log] [blame]
Joel E. Denny9fa9c932018-07-11 20:25:49 +00001; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX900 %s
2; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX906,NO-D16-HI %s
3; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX803,NO-D16-HI %s
Matt Arsenaultfcc213f2017-09-20 03:20:09 +00004
5; GCN-LABEL: {{^}}store_global_hi_v2i16:
6; GCN: s_waitcnt
7
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +00008; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +00009
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +000010; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
11; GFX803-NEXT: flat_store_short v[0:1], v2
12; GFX906-NEXT: global_store_short v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +000013
14; GCN-NEXT: s_waitcnt
15; GCN-NEXT: s_setpc_b64
16define void @store_global_hi_v2i16(i16 addrspace(1)* %out, i32 %arg) #0 {
17entry:
18 ; FIXME: ABI for pre-gfx9
19 %value = bitcast i32 %arg to <2 x i16>
20 %hi = extractelement <2 x i16> %value, i32 1
21 store i16 %hi, i16 addrspace(1)* %out
22 ret void
23}
24
25; GCN-LABEL: {{^}}store_global_hi_v2f16:
26; GCN: s_waitcnt
27
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +000028; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +000029
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +000030; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
31; GFX803-NEXT: flat_store_short v[0:1], v2
32; GFX906-NEXT: global_store_short v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +000033
34; GCN-NEXT: s_waitcnt
35; GCN-NEXT: s_setpc_b64
36define void @store_global_hi_v2f16(half addrspace(1)* %out, i32 %arg) #0 {
37entry:
38 ; FIXME: ABI for pre-gfx9
39 %value = bitcast i32 %arg to <2 x half>
40 %hi = extractelement <2 x half> %value, i32 1
41 store half %hi, half addrspace(1)* %out
42 ret void
43}
44
45; GCN-LABEL: {{^}}store_global_hi_i32_shift:
46; GCN: s_waitcnt
47
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +000048; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +000049
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +000050; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
51; GFX803-NEXT: flat_store_short v[0:1], v2
52; GFX906-NEXT: global_store_short v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +000053
54; GCN-NEXT: s_waitcnt
55; GCN-NEXT: s_setpc_b64
56define void @store_global_hi_i32_shift(i16 addrspace(1)* %out, i32 %value) #0 {
57entry:
58 %hi32 = lshr i32 %value, 16
59 %hi = trunc i32 %hi32 to i16
60 store i16 %hi, i16 addrspace(1)* %out
61 ret void
62}
63
64; GCN-LABEL: {{^}}store_global_hi_v2i16_i8:
65; GCN: s_waitcnt
66
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +000067; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +000068
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +000069; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
70; GFX803-NEXT: flat_store_byte v[0:1], v2
71; GFX906-NEXT: global_store_byte v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +000072
73; GCN-NEXT: s_waitcnt
74; GCN-NEXT: s_setpc_b64
75define void @store_global_hi_v2i16_i8(i8 addrspace(1)* %out, i32 %arg) #0 {
76entry:
77 %value = bitcast i32 %arg to <2 x i16>
78 %hi = extractelement <2 x i16> %value, i32 1
79 %trunc = trunc i16 %hi to i8
80 store i8 %trunc, i8 addrspace(1)* %out
81 ret void
82}
83
84; GCN-LABEL: {{^}}store_global_hi_i8_shift:
85; GCN: s_waitcnt
86
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +000087; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +000088
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +000089; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
90; GFX803-NEXT: flat_store_byte v[0:1], v2
91; GFX906-NEXT: global_store_byte v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +000092
93; GCN-NEXT: s_waitcnt
94; GCN-NEXT: s_setpc_b64
95define void @store_global_hi_i8_shift(i8 addrspace(1)* %out, i32 %value) #0 {
96entry:
97 %hi32 = lshr i32 %value, 16
98 %hi = trunc i32 %hi32 to i8
99 store i8 %hi, i8 addrspace(1)* %out
100 ret void
101}
102
103; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset:
104; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000105; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000106
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000107; GFX803-DAG: v_add_u32_e32
108; GFX803-DAG: v_addc_u32_e32
109; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
110; GFX803: flat_store_short v[0:1], v2{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000111
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000112; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
113; GFX906-NEXT: global_store_short v[0:1], v2, off
114
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000115; GCN-NEXT: s_waitcnt
116; GCN-NEXT: s_setpc_b64
117define void @store_global_hi_v2i16_max_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
118entry:
119 ; FIXME: ABI for pre-gfx9
120 %value = bitcast i32 %arg to <2 x i16>
121 %hi = extractelement <2 x i16> %value, i32 1
122 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 2047
123 store i16 %hi, i16 addrspace(1)* %gep
124 ret void
125}
126
127; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset:
128; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000129; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000130
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000131; GFX803-DAG: v_add_u32_e32
132; GFX803-DAG: v_addc_u32_e32
133; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
134; GFX803: flat_store_short v[0:1], v{{[0-9]$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000135
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000136; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
137; GFX906-NEXT: global_store_short v[0:1], v2, off
138
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000139; GCN-NEXT: s_waitcnt
140; GCN-NEXT: s_setpc_b64
141define void @store_global_hi_v2i16_min_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
142entry:
143 %value = bitcast i32 %arg to <2 x i16>
144 %hi = extractelement <2 x i16> %value, i32 1
145 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 -2048
146 store i16 %hi, i16 addrspace(1)* %gep
147 ret void
148}
149
150; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset:
151; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000152; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000153
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000154; GFX803-DAG: v_add_u32_e32
155; GFX803-DAG: v_addc_u32_e32
156; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
157; GFX803: flat_store_byte v[0:1], v{{[0-9]$}}
158
159; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
160; GFX906-NEXT: global_store_byte v[0:1], v2, off
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000161
162; GCN-NEXT: s_waitcnt
163; GCN-NEXT: s_setpc_b64
164define void @store_global_hi_v2i16_i8_max_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
165entry:
166 %value = bitcast i32 %arg to <2 x i16>
167 %hi = extractelement <2 x i16> %value, i32 1
168 %trunc = trunc i16 %hi to i8
169 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4095
170 store i8 %trunc, i8 addrspace(1)* %gep
171 ret void
172}
173
174; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset:
175; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000176; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000177
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000178; GFX803-DAG: v_add_u32_e32
179; GFX803-DAG: v_addc_u32_e32
180; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
181; GFX803: flat_store_byte v[0:1], v{{[0-9]$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000182
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000183; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
184; GFX906-NEXT: global_store_byte v[0:1], v2, off
185
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000186; GCN-NEXT: s_waitcnt
187; GCN-NEXT: s_setpc_b64
188define void @store_global_hi_v2i16_i8_min_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
189entry:
190 %value = bitcast i32 %arg to <2 x i16>
191 %hi = extractelement <2 x i16> %value, i32 1
192 %trunc = trunc i16 %hi to i8
193 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 -4095
194 store i8 %trunc, i8 addrspace(1)* %gep
195 ret void
196}
197
198; GCN-LABEL: {{^}}store_flat_hi_v2i16:
199; GCN: s_waitcnt
200
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000201; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000202
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000203; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
204; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000205
206; GCN-NEXT: s_waitcnt
207; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000208define void @store_flat_hi_v2i16(i16* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000209entry:
210 %value = bitcast i32 %arg to <2 x i16>
211 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000212 store i16 %hi, i16* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000213 ret void
214}
215
216; GCN-LABEL: {{^}}store_flat_hi_v2f16:
217; GCN: s_waitcnt
218
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000219; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000220
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000221; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
222; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000223
224; GCN-NEXT: s_waitcnt
225; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000226define void @store_flat_hi_v2f16(half* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000227entry:
228 %value = bitcast i32 %arg to <2 x half>
229 %hi = extractelement <2 x half> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000230 store half %hi, half* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000231 ret void
232}
233
234; GCN-LABEL: {{^}}store_flat_hi_i32_shift:
235; GCN: s_waitcnt
236
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000237; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000238
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000239; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
240; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000241
242; GCN-NEXT: s_waitcnt
243; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000244define void @store_flat_hi_i32_shift(i16* %out, i32 %value) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000245entry:
246 %hi32 = lshr i32 %value, 16
247 %hi = trunc i32 %hi32 to i16
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000248 store i16 %hi, i16* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000249 ret void
250}
251
252; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8:
253; GCN: s_waitcnt
254
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000255; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000256
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000257; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
258; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000259
260; GCN-NEXT: s_waitcnt
261; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000262define void @store_flat_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000263entry:
264 %value = bitcast i32 %arg to <2 x i16>
265 %hi = extractelement <2 x i16> %value, i32 1
266 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000267 store i8 %trunc, i8* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000268 ret void
269}
270
271; GCN-LABEL: {{^}}store_flat_hi_i8_shift:
272; GCN: s_waitcnt
273
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000274; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000275
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000276; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
277; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000278
279; GCN-NEXT: s_waitcnt
280; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000281define void @store_flat_hi_i8_shift(i8* %out, i32 %value) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000282entry:
283 %hi32 = lshr i32 %value, 16
284 %hi = trunc i32 %hi32 to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000285 store i8 %hi, i8* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000286 ret void
287}
288
289; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset:
290; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000291; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000292
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000293; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
294; GFX906-NEXT: flat_store_short v[0:1], v2 offset:4094
295
296; GFX803-DAG: v_add_u32_e32
297; GFX803-DAG: v_addc_u32_e32
298; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
299; GFX803: flat_store_short v[0:1], v2{{$}}
300
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000301; GCN-NEXT: s_waitcnt
302; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000303define void @store_flat_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000304entry:
305 %value = bitcast i32 %arg to <2 x i16>
306 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000307 %gep = getelementptr inbounds i16, i16* %out, i64 2047
308 store i16 %hi, i16* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000309 ret void
310}
311
312; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
313; GCN: s_waitcnt
Matt Arsenault9a7e29a2017-11-29 02:25:14 +0000314; GCN: v_add{{(_co)?}}_{{i|u}}32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000315
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000316; GFX803: v_addc_u32_e32
317; GFX900: v_addc_co_u32_e32
318
319; GFX906-NEXT: v_lshrrev_b32_e32
320; GFX906-NEXT: v_addc_co_u32_e32
321; GFX906: flat_store_short v[0:1], v2
322
323; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
324; GFX803: flat_store_short v[0:1], v2{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000325; GCN-NEXT: s_waitcnt
326; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000327define void @store_flat_hi_v2i16_neg_offset(i16* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000328entry:
329 %value = bitcast i32 %arg to <2 x i16>
330 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000331 %gep = getelementptr inbounds i16, i16* %out, i64 -1023
332 store i16 %hi, i16* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000333 ret void
334}
335
336; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset:
337; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000338; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000339
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000340; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
341; GFX803-DAG: v_add_u32_e32
342; GFX803-DAG: v_addc_u32_e32
343; GFX803: flat_store_byte v[0:1], v2{{$}}
344
345; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
346; GFX906-NEXT: flat_store_byte v[0:1], v2 offset:4095{{$}}
347
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000348; GCN-NEXT: s_waitcnt
349; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000350define void @store_flat_hi_v2i16_i8_max_offset(i8* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000351entry:
352 %value = bitcast i32 %arg to <2 x i16>
353 %hi = extractelement <2 x i16> %value, i32 1
354 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000355 %gep = getelementptr inbounds i8, i8* %out, i64 4095
356 store i8 %trunc, i8* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000357 ret void
358}
359
360; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
361; GCN: s_waitcnt
Matt Arsenault9a7e29a2017-11-29 02:25:14 +0000362; GCN-DAG: v_add{{(_co)?}}_{{i|u}}32_e32
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000363
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000364; GFX803-DAG: v_addc_u32_e32
365; GFX900-DAG: v_addc_co_u32_e32
366; GFX906-DAG: v_add_co_u32_e32
367
368; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
369
370; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
371; GFX906-NEXT: v_addc_co_u32_e32
372; GFX906-NEXT: flat_store_byte v[0:1], v2{{$}}
373
374; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
375; GFX803: flat_store_byte v[0:1], v2{{$}}
376
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000377; GCN-NEXT: s_waitcnt
378; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000379define void @store_flat_hi_v2i16_i8_neg_offset(i8* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000380entry:
381 %value = bitcast i32 %arg to <2 x i16>
382 %hi = extractelement <2 x i16> %value, i32 1
383 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000384 %gep = getelementptr inbounds i8, i8* %out, i64 -4095
385 store i8 %trunc, i8* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000386 ret void
387}
388
389; GCN-LABEL: {{^}}store_private_hi_v2i16:
390; GCN: s_waitcnt
391
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000392; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000393
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000394; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
395; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000396
397; GCN-NEXT: s_waitcnt
398; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000399define void @store_private_hi_v2i16(i16 addrspace(5)* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000400entry:
401 ; FIXME: ABI for pre-gfx9
402 %value = bitcast i32 %arg to <2 x i16>
403 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000404 store i16 %hi, i16 addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000405 ret void
406}
407
408; GCN-LABEL: {{^}}store_private_hi_v2f16:
409; GCN: s_waitcnt
410
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000411; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000412
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000413; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
414; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000415
416; GCN-NEXT: s_waitcnt
417; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000418define void @store_private_hi_v2f16(half addrspace(5)* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000419entry:
420 ; FIXME: ABI for pre-gfx9
421 %value = bitcast i32 %arg to <2 x half>
422 %hi = extractelement <2 x half> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000423 store half %hi, half addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000424 ret void
425}
426
427; GCN-LABEL: {{^}}store_private_hi_i32_shift:
428; GCN: s_waitcnt
429
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000430; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000431
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000432; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
433; NO-D16-HI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000434
435; GCN-NEXT: s_waitcnt
436; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000437define void @store_private_hi_i32_shift(i16 addrspace(5)* %out, i32 %value) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000438entry:
439 %hi32 = lshr i32 %value, 16
440 %hi = trunc i32 %hi32 to i16
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000441 store i16 %hi, i16 addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000442 ret void
443}
444
445; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
446; GCN: s_waitcnt
447
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000448; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000449
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000450; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
451; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000452
453; GCN-NEXT: s_waitcnt
454; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000455define void @store_private_hi_v2i16_i8(i8 addrspace(5)* %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000456entry:
457 %value = bitcast i32 %arg to <2 x i16>
458 %hi = extractelement <2 x i16> %value, i32 1
459 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000460 store i8 %trunc, i8 addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000461 ret void
462}
463
464; GCN-LABEL: {{^}}store_private_hi_i8_shift:
465; GCN: s_waitcnt
466
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000467; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000468
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000469; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
470; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000471
472; GCN-NEXT: s_waitcnt
473; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000474define void @store_private_hi_i8_shift(i8 addrspace(5)* %out, i32 %value) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000475entry:
476 %hi32 = lshr i32 %value, 16
477 %hi = trunc i32 %hi32 to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000478 store i8 %hi, i8 addrspace(5)* %out
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000479 ret void
480}
481
482; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
483; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000484; GFX900: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000485
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000486; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
487; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s5 offset:4094{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000488
489; GCN-NEXT: s_waitcnt
490; GCN-NEXT: s_setpc_b64
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000491define void @store_private_hi_v2i16_max_offset(i16 addrspace(5)* byval %out, i32 %arg) #0 {
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000492entry:
493 %value = bitcast i32 %arg to <2 x i16>
494 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000495 %gep = getelementptr inbounds i16, i16 addrspace(5)* %out, i64 2045
496 store i16 %hi, i16 addrspace(5)* %gep
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000497 ret void
498}
499
500
501
502; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
503; GCN: s_waitcnt
504
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000505; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s4{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000506
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000507; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
508; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s4{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000509
510; GCN-NEXT: s_waitcnt
511; GCN-NEXT: s_setpc_b64
512define void @store_private_hi_v2i16_nooff(i32 %arg) #0 {
513entry:
514 ; FIXME: ABI for pre-gfx9
515 %value = bitcast i32 %arg to <2 x i16>
516 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000517 store volatile i16 %hi, i16 addrspace(5)* null
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000518 ret void
519}
520
521
522; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
523; GCN: s_waitcnt
524
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000525; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s4{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000526
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000527; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
528; NO-D16-HI: buffer_store_byte v0, off, s[0:3], s4{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000529
530; GCN-NEXT: s_waitcnt
531; GCN-NEXT: s_setpc_b64
532define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 {
533entry:
534 %value = bitcast i32 %arg to <2 x i16>
535 %hi = extractelement <2 x i16> %value, i32 1
536 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000537 store volatile i8 %trunc, i8 addrspace(5)* null
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000538 ret void
539}
540
541; GCN-LABEL: {{^}}store_local_hi_v2i16:
542; GCN: s_waitcnt
543
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000544; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000545
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000546; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
547; NO-D16-HI: ds_write_b16 v0, v1
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000548
549; GCN-NEXT: s_waitcnt
550; GCN-NEXT: s_setpc_b64
551define void @store_local_hi_v2i16(i16 addrspace(3)* %out, i32 %arg) #0 {
552entry:
553 ; FIXME: ABI for pre-gfx9
554 %value = bitcast i32 %arg to <2 x i16>
555 %hi = extractelement <2 x i16> %value, i32 1
556 store i16 %hi, i16 addrspace(3)* %out
557 ret void
558}
559
560; GCN-LABEL: {{^}}store_local_hi_v2f16:
561; GCN: s_waitcnt
562
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000563; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000564
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000565; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
566; NO-D16-HI: ds_write_b16 v0, v1
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000567
568; GCN-NEXT: s_waitcnt
569; GCN-NEXT: s_setpc_b64
570define void @store_local_hi_v2f16(half addrspace(3)* %out, i32 %arg) #0 {
571entry:
572 ; FIXME: ABI for pre-gfx9
573 %value = bitcast i32 %arg to <2 x half>
574 %hi = extractelement <2 x half> %value, i32 1
575 store half %hi, half addrspace(3)* %out
576 ret void
577}
578
579; GCN-LABEL: {{^}}store_local_hi_i32_shift:
580; GCN: s_waitcnt
581
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000582; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000583
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000584; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
585; NO-D16-HI: ds_write_b16 v0, v1
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000586
587; GCN-NEXT: s_waitcnt
588; GCN-NEXT: s_setpc_b64
589define void @store_local_hi_i32_shift(i16 addrspace(3)* %out, i32 %value) #0 {
590entry:
591 %hi32 = lshr i32 %value, 16
592 %hi = trunc i32 %hi32 to i16
593 store i16 %hi, i16 addrspace(3)* %out
594 ret void
595}
596
597; GCN-LABEL: {{^}}store_local_hi_v2i16_i8:
598; GCN: s_waitcnt
599
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000600; GFX900-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000601
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000602; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
603; NO-D16-HI: ds_write_b8 v0, v1
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000604
605; GCN-NEXT: s_waitcnt
606; GCN-NEXT: s_setpc_b64
607define void @store_local_hi_v2i16_i8(i8 addrspace(3)* %out, i32 %arg) #0 {
608entry:
609 %value = bitcast i32 %arg to <2 x i16>
610 %hi = extractelement <2 x i16> %value, i32 1
611 %trunc = trunc i16 %hi to i8
612 store i8 %trunc, i8 addrspace(3)* %out
613 ret void
614}
615
616; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset:
617; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000618; GFX900-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000619
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000620; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
621; NO-D16-HI: ds_write_b16 v0, v1 offset:65534{{$}}
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000622
623; GCN-NEXT: s_waitcnt
624; GCN-NEXT: s_setpc_b64
625define void @store_local_hi_v2i16_max_offset(i16 addrspace(3)* %out, i32 %arg) #0 {
626entry:
627 ; FIXME: ABI for pre-gfx9
628 %value = bitcast i32 %arg to <2 x i16>
629 %hi = extractelement <2 x i16> %value, i32 1
630 %gep = getelementptr inbounds i16, i16 addrspace(3)* %out, i64 32767
631 store i16 %hi, i16 addrspace(3)* %gep
632 ret void
633}
634
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000635; GCN-LABEL: {{^}}store_private_hi_v2i16_to_offset:
636; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000637; GFX900: buffer_store_dword
638; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000639define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 {
640entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000641 %obj0 = alloca [10 x i32], align 4, addrspace(5)
642 %obj1 = alloca [4096 x i16], align 2, addrspace(5)
643 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
644 store volatile i32 123, i32 addrspace(5)* %bc
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000645 %value = bitcast i32 %arg to <2 x i16>
646 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000647 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025
648 store i16 %hi, i16 addrspace(5)* %gep
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000649 ret void
650}
651
652; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_to_offset:
653; GCN: s_waitcnt
Konstantin Zhuravlyovc2c2eb72018-05-04 20:06:57 +0000654; GFX900: buffer_store_dword
655; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s5 offset:4095
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000656define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 {
657entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000658 %obj0 = alloca [10 x i32], align 4, addrspace(5)
659 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
660 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
661 store volatile i32 123, i32 addrspace(5)* %bc
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000662 %value = bitcast i32 %arg to <2 x i16>
663 %hi = extractelement <2 x i16> %value, i32 1
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000664 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000665 %trunc = trunc i16 %hi to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000666 store i8 %trunc, i8 addrspace(5)* %gep
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000667 ret void
668}
669
Matt Arsenaultfcc213f2017-09-20 03:20:09 +0000670attributes #0 = { nounwind }