blob: 62d372a398aa668c56bd0bbbaa5bd1da7a69aa0c [file] [log] [blame]
Matt Arsenault65ad1602015-05-24 00:51:27 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
3
4; Run with devices with different unaligned load restrictions.
5
6; TODO: Vector element tests
7; TODO: Non-zero base offset for load and store combinations
8; TODO: Same base addrspacecasted
9
10
11; GCN-LABEL: {{^}}merge_global_store_2_constants_i8:
12; GCN: buffer_store_byte
13; GCN: buffer_store_byte
14; GCN: s_endpgm
15define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
16 %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
17
18 store i8 123, i8 addrspace(1)* %out.gep.1
19 store i8 456, i8 addrspace(1)* %out, align 2
20 ret void
21}
22
23; GCN-LABEL: {{^}}merge_global_store_2_constants_i8_natural_align:
24; GCN: buffer_store_byte
25; GCN: buffer_store_byte
26; GCN: s_endpgm
27define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 {
28 %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
29
30 store i8 123, i8 addrspace(1)* %out.gep.1
31 store i8 456, i8 addrspace(1)* %out
32 ret void
33}
34
35; GCN-LABEL: {{^}}merge_global_store_2_constants_i16:
36; GCN: buffer_store_dword v
37define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 {
38 %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
39
40 store i16 123, i16 addrspace(1)* %out.gep.1
41 store i16 456, i16 addrspace(1)* %out, align 4
42 ret void
43}
44
45; GCN-LABEL: {{^}}merge_global_store_2_constants_0_i16:
46; GCN: buffer_store_dword v
47define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 {
48 %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
49
50 store i16 0, i16 addrspace(1)* %out.gep.1
51 store i16 0, i16 addrspace(1)* %out, align 4
52 ret void
53}
54
55; GCN-LABEL: {{^}}merge_global_store_2_constants_i16_natural_align:
56; GCN: buffer_store_short
57; GCN: buffer_store_short
58; GCN: s_endpgm
59define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 {
60 %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
61
62 store i16 123, i16 addrspace(1)* %out.gep.1
63 store i16 456, i16 addrspace(1)* %out
64 ret void
65}
66
67; GCN-LABEL: {{^}}merge_global_store_2_constants_i32:
68; SI-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
69; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
70; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
71; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
72; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
73define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
74 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
75
76 store i32 123, i32 addrspace(1)* %out.gep.1
77 store i32 456, i32 addrspace(1)* %out
78 ret void
79}
80
81; GCN-LABEL: {{^}}merge_global_store_2_constants_i32_f32:
82; GCN: buffer_store_dwordx2
83define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
84 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
85 %out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)*
86 store float 1.0, float addrspace(1)* %out.gep.1.bc
87 store i32 456, i32 addrspace(1)* %out
88 ret void
89}
90
91; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32:
Matt Arsenaulted891b52015-06-16 15:51:48 +000092; SI-DAG: s_mov_b32 [[SLO:s[0-9]+]], 4.0
93; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b{{$}}
94; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[SLO]]
95; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[SHI]]
96; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
Matt Arsenault65ad1602015-05-24 00:51:27 +000097define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
98 %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
99 %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
100 store i32 123, i32 addrspace(1)* %out.gep.1.bc
101 store float 4.0, float addrspace(1)* %out
102 ret void
103}
104
105; GCN-LABEL: {{^}}merge_global_store_4_constants_i32:
Matt Arsenaulted891b52015-06-16 15:51:48 +0000106; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x14d{{$}}
107; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x1c8{{$}}
108; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}}
109; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}}
110; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault65ad1602015-05-24 00:51:27 +0000111define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
112 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
113 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
114 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
115
116 store i32 123, i32 addrspace(1)* %out.gep.1
117 store i32 456, i32 addrspace(1)* %out.gep.2
118 store i32 333, i32 addrspace(1)* %out.gep.3
119 store i32 1234, i32 addrspace(1)* %out
120 ret void
121}
122
123; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
Matt Arsenaultb7748342015-09-21 15:59:46 +0000124; GCN: buffer_store_dwordx4
Matt Arsenault65ad1602015-05-24 00:51:27 +0000125define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
126 %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
127 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
128 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
129
130 store float 8.0, float addrspace(1)* %out
131 store float 1.0, float addrspace(1)* %out.gep.1
132 store float 2.0, float addrspace(1)* %out.gep.2
133 store float 4.0, float addrspace(1)* %out.gep.3
134 ret void
135}
136
Matt Arsenaultb7748342015-09-21 15:59:46 +0000137; First store is out of order.
Matt Arsenault65ad1602015-05-24 00:51:27 +0000138; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
Matt Arsenaultb7748342015-09-21 15:59:46 +0000139; GCN: buffer_store_dwordx4
Matt Arsenault65ad1602015-05-24 00:51:27 +0000140define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
141 %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
142 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
143 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
144
145 store float 1.0, float addrspace(1)* %out.gep.1
146 store float 2.0, float addrspace(1)* %out.gep.2
147 store float 4.0, float addrspace(1)* %out.gep.3
148 store float 8.0, float addrspace(1)* %out
149 ret void
150}
151
Matt Arsenaultb7748342015-09-21 15:59:46 +0000152; FIXME: Should be able to merge this
153; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
154; XGCN: buffer_store_dwordx4
155; GCN: buffer_store_dword
156; GCN: buffer_store_dword
157; GCN: buffer_store_dword
158; GCN: buffer_store_dword
159; GCN: s_endpgm
160define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
161 %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
162 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
163 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
164
165 %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
166 %out.gep.3.bc = bitcast float addrspace(1)* %out.gep.3 to i32 addrspace(1)*
167
168 store i32 11, i32 addrspace(1)* %out.gep.1.bc
169 store float 2.0, float addrspace(1)* %out.gep.2
170 store i32 17, i32 addrspace(1)* %out.gep.3.bc
171 store float 8.0, float addrspace(1)* %out
172 ret void
173}
174
Matt Arsenault65ad1602015-05-24 00:51:27 +0000175; GCN-LABEL: {{^}}merge_global_store_3_constants_i32:
176; SI-DAG: buffer_store_dwordx2
177; SI-DAG: buffer_store_dword
178; SI-NOT: buffer_store_dword
179; GCN: s_endpgm
180define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
181 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
182 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
183
184 store i32 123, i32 addrspace(1)* %out.gep.1
185 store i32 456, i32 addrspace(1)* %out.gep.2
186 store i32 1234, i32 addrspace(1)* %out
187 ret void
188}
189
190; GCN-LABEL: {{^}}merge_global_store_2_constants_i64:
191; XGCN: buffer_store_dwordx4
192; GCN: buffer_store_dwordx2
193; GCN: buffer_store_dwordx2
194define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
195 %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
196
197 store i64 123, i64 addrspace(1)* %out.gep.1
198 store i64 456, i64 addrspace(1)* %out
199 ret void
200}
201
202; GCN-LABEL: {{^}}merge_global_store_4_constants_i64:
203; XGCN: buffer_store_dwordx4
204; XGCN: buffer_store_dwordx4
205
206; GCN: buffer_store_dwordx2
207; GCN: buffer_store_dwordx2
208; GCN: buffer_store_dwordx2
209; GCN: buffer_store_dwordx2
210define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
211 %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
212 %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2
213 %out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3
214
215 store i64 123, i64 addrspace(1)* %out.gep.1
216 store i64 456, i64 addrspace(1)* %out.gep.2
217 store i64 333, i64 addrspace(1)* %out.gep.3
218 store i64 1234, i64 addrspace(1)* %out
219 ret void
220}
221
222; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32:
223; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
224; GCN: buffer_store_dwordx2 [[LOAD]]
225define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
226 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
227 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
228
229 %lo = load i32, i32 addrspace(1)* %in
230 %hi = load i32, i32 addrspace(1)* %in.gep.1
231
232 store i32 %lo, i32 addrspace(1)* %out
233 store i32 %hi, i32 addrspace(1)* %out.gep.1
234 ret void
235}
236
237; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base:
238; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
239; GCN: buffer_store_dwordx2 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
240define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
241 %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2
242 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3
243
244 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 2
245 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 3
246 %lo = load i32, i32 addrspace(1)* %in.gep.0
247 %hi = load i32, i32 addrspace(1)* %in.gep.1
248
249 store i32 %lo, i32 addrspace(1)* %out.gep.0
250 store i32 %hi, i32 addrspace(1)* %out.gep.1
251 ret void
252}
253
254; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_shuffle_i32:
255; GCN: buffer_load_dword v
256; GCN: buffer_load_dword v
257; GCN: buffer_store_dword v
258; GCN: buffer_store_dword v
259define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
260 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
261 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
262
263 %lo = load i32, i32 addrspace(1)* %in
264 %hi = load i32, i32 addrspace(1)* %in.gep.1
265
266 store i32 %hi, i32 addrspace(1)* %out
267 store i32 %lo, i32 addrspace(1)* %out.gep.1
268 ret void
269}
270
271; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32:
272; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
273; GCN: buffer_store_dwordx4 [[LOAD]]
274define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
275 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
276 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
277 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
278 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
279 %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
280 %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
281
282 %x = load i32, i32 addrspace(1)* %in
283 %y = load i32, i32 addrspace(1)* %in.gep.1
284 %z = load i32, i32 addrspace(1)* %in.gep.2
285 %w = load i32, i32 addrspace(1)* %in.gep.3
286
287 store i32 %x, i32 addrspace(1)* %out
288 store i32 %y, i32 addrspace(1)* %out.gep.1
289 store i32 %z, i32 addrspace(1)* %out.gep.2
290 store i32 %w, i32 addrspace(1)* %out.gep.3
291 ret void
292}
293
294; GCN-LABEL: {{^}}merge_global_store_3_adjacent_loads_i32:
295; SI-DAG: buffer_load_dwordx2
296; SI-DAG: buffer_load_dword v
297; GCN: s_waitcnt
298; SI-DAG: buffer_store_dword v
299; SI-DAG: buffer_store_dwordx2 v
300; GCN: s_endpgm
301define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
302 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
303 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
304 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
305 %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
306
307 %x = load i32, i32 addrspace(1)* %in
308 %y = load i32, i32 addrspace(1)* %in.gep.1
309 %z = load i32, i32 addrspace(1)* %in.gep.2
310
311 store i32 %x, i32 addrspace(1)* %out
312 store i32 %y, i32 addrspace(1)* %out.gep.1
313 store i32 %z, i32 addrspace(1)* %out.gep.2
314 ret void
315}
316
317; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_f32:
318; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
319; GCN: buffer_store_dwordx4 [[LOAD]]
320define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
321 %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
322 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
323 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
324 %in.gep.1 = getelementptr float, float addrspace(1)* %in, i32 1
325 %in.gep.2 = getelementptr float, float addrspace(1)* %in, i32 2
326 %in.gep.3 = getelementptr float, float addrspace(1)* %in, i32 3
327
328 %x = load float, float addrspace(1)* %in
329 %y = load float, float addrspace(1)* %in.gep.1
330 %z = load float, float addrspace(1)* %in.gep.2
331 %w = load float, float addrspace(1)* %in.gep.3
332
333 store float %x, float addrspace(1)* %out
334 store float %y, float addrspace(1)* %out.gep.1
335 store float %z, float addrspace(1)* %out.gep.2
336 store float %w, float addrspace(1)* %out.gep.3
337 ret void
338}
339
340; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base:
341; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
342; GCN: buffer_store_dwordx4 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
343define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
344 %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11
345 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12
346 %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13
347 %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 14
348 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 7
349 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 8
350 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 9
351 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 10
352
353 %x = load i32, i32 addrspace(1)* %in.gep.0
354 %y = load i32, i32 addrspace(1)* %in.gep.1
355 %z = load i32, i32 addrspace(1)* %in.gep.2
356 %w = load i32, i32 addrspace(1)* %in.gep.3
357
358 store i32 %x, i32 addrspace(1)* %out.gep.0
359 store i32 %y, i32 addrspace(1)* %out.gep.1
360 store i32 %z, i32 addrspace(1)* %out.gep.2
361 store i32 %w, i32 addrspace(1)* %out.gep.3
362 ret void
363}
364
365; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_inverse_i32:
366; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
367; GCN: s_barrier
368; GCN: buffer_store_dwordx4 [[LOAD]]
369define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
370 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
371 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
372 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
373 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
374 %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
375 %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
376
377 %x = load i32, i32 addrspace(1)* %in
378 %y = load i32, i32 addrspace(1)* %in.gep.1
379 %z = load i32, i32 addrspace(1)* %in.gep.2
380 %w = load i32, i32 addrspace(1)* %in.gep.3
381
382 ; Make sure the barrier doesn't stop this
383 tail call void @llvm.AMDGPU.barrier.local() #1
384
385 store i32 %w, i32 addrspace(1)* %out.gep.3
386 store i32 %z, i32 addrspace(1)* %out.gep.2
387 store i32 %y, i32 addrspace(1)* %out.gep.1
388 store i32 %x, i32 addrspace(1)* %out
389
390 ret void
391}
392
393; TODO: Re-packing of loaded register required. Maybe an IR pass
394; should catch this?
395
396; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_shuffle_i32:
397; GCN: buffer_load_dword v
398; GCN: buffer_load_dword v
399; GCN: buffer_load_dword v
400; GCN: buffer_load_dword v
401; GCN: s_barrier
402; GCN: buffer_store_dword v
403; GCN: buffer_store_dword v
404; GCN: buffer_store_dword v
405; GCN: buffer_store_dword v
406define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
407 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
408 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
409 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
410 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
411 %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
412 %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
413
414 %x = load i32, i32 addrspace(1)* %in
415 %y = load i32, i32 addrspace(1)* %in.gep.1
416 %z = load i32, i32 addrspace(1)* %in.gep.2
417 %w = load i32, i32 addrspace(1)* %in.gep.3
418
419 ; Make sure the barrier doesn't stop this
420 tail call void @llvm.AMDGPU.barrier.local() #1
421
422 store i32 %w, i32 addrspace(1)* %out
423 store i32 %z, i32 addrspace(1)* %out.gep.1
424 store i32 %y, i32 addrspace(1)* %out.gep.2
425 store i32 %x, i32 addrspace(1)* %out.gep.3
426
427 ret void
428}
429
430; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8:
431; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
432; GCN: buffer_store_dword [[LOAD]]
433; GCN: s_endpgm
434define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
435 %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
436 %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
437 %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
438 %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
439 %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
440 %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
441
442 %x = load i8, i8 addrspace(1)* %in, align 4
443 %y = load i8, i8 addrspace(1)* %in.gep.1
444 %z = load i8, i8 addrspace(1)* %in.gep.2
445 %w = load i8, i8 addrspace(1)* %in.gep.3
446
447 store i8 %x, i8 addrspace(1)* %out, align 4
448 store i8 %y, i8 addrspace(1)* %out.gep.1
449 store i8 %z, i8 addrspace(1)* %out.gep.2
450 store i8 %w, i8 addrspace(1)* %out.gep.3
451 ret void
452}
453
454; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8_natural_align:
455; GCN: buffer_load_ubyte
456; GCN: buffer_load_ubyte
457; GCN: buffer_load_ubyte
458; GCN: buffer_load_ubyte
459; GCN: buffer_store_byte
460; GCN: buffer_store_byte
461; GCN: buffer_store_byte
462; GCN: buffer_store_byte
463; GCN: s_endpgm
464define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
465 %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
466 %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
467 %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
468 %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
469 %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
470 %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
471
472 %x = load i8, i8 addrspace(1)* %in
473 %y = load i8, i8 addrspace(1)* %in.gep.1
474 %z = load i8, i8 addrspace(1)* %in.gep.2
475 %w = load i8, i8 addrspace(1)* %in.gep.3
476
477 store i8 %x, i8 addrspace(1)* %out
478 store i8 %y, i8 addrspace(1)* %out.gep.1
479 store i8 %z, i8 addrspace(1)* %out.gep.2
480 store i8 %w, i8 addrspace(1)* %out.gep.3
481 ret void
482}
483
484; This works once AA is enabled on the subtarget
485; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
486; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
487; XGCN: buffer_store_dwordx4 [[LOAD]]
488; GCN: buffer_store_dword v
489; GCN: buffer_store_dword v
490; GCN: buffer_store_dword v
491; GCN: buffer_store_dword v
492define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
493 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
494 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
495 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
496 %vec = load <4 x i32>, <4 x i32> addrspace(1)* %in
497
498 %x = extractelement <4 x i32> %vec, i32 0
499 %y = extractelement <4 x i32> %vec, i32 1
500 %z = extractelement <4 x i32> %vec, i32 2
501 %w = extractelement <4 x i32> %vec, i32 3
502
503 store i32 %x, i32 addrspace(1)* %out
504 store i32 %y, i32 addrspace(1)* %out.gep.1
505 store i32 %z, i32 addrspace(1)* %out.gep.2
506 store i32 %w, i32 addrspace(1)* %out.gep.3
507 ret void
508}
509
510; GCN-LABEL: {{^}}merge_local_store_2_constants_i8:
511; GCN: ds_write_b8
512; GCN: ds_write_b8
513; GCN: s_endpgm
514define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
515 %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1
516
517 store i8 123, i8 addrspace(3)* %out.gep.1
518 store i8 456, i8 addrspace(3)* %out, align 2
519 ret void
520}
521
522; GCN-LABEL: {{^}}merge_local_store_2_constants_i32:
Tom Stellard9a197672015-09-09 15:43:26 +0000523; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
524; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
Matt Arsenault65ad1602015-05-24 00:51:27 +0000525; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
526define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
527 %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
528
529 store i32 123, i32 addrspace(3)* %out.gep.1
530 store i32 456, i32 addrspace(3)* %out
531 ret void
532}
533
534; GCN-LABEL: {{^}}merge_local_store_4_constants_i32:
535; GCN: ds_write_b32
536; GCN: ds_write_b32
537; GCN: ds_write_b32
538; GCN: ds_write_b32
539define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
540 %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
541 %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
542 %out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3
543
544 store i32 123, i32 addrspace(3)* %out.gep.1
545 store i32 456, i32 addrspace(3)* %out.gep.2
546 store i32 333, i32 addrspace(3)* %out.gep.3
547 store i32 1234, i32 addrspace(3)* %out
548 ret void
549}
550
Matt Arsenaulted891b52015-06-16 15:51:48 +0000551; GCN-LABEL: {{^}}merge_global_store_5_constants_i32:
552; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 9{{$}}
553; GCN-DAG: v_mov_b32_e32 v[[HI4:[0-9]+]], -12{{$}}
554; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI4]]{{\]}}
555; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}}
556; GCN: buffer_store_dword v[[HI]]
557define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) {
558 store i32 9, i32 addrspace(1)* %out, align 4
559 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
560 store i32 12, i32 addrspace(1)* %idx1, align 4
561 %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
562 store i32 16, i32 addrspace(1)* %idx2, align 4
563 %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
564 store i32 -12, i32 addrspace(1)* %idx3, align 4
565 %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
566 store i32 11, i32 addrspace(1)* %idx4, align 4
567 ret void
568}
569
570; GCN-LABEL: {{^}}merge_global_store_6_constants_i32:
571; GCN: buffer_store_dwordx4
572; GCN: buffer_store_dwordx2
573define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) {
574 store i32 13, i32 addrspace(1)* %out, align 4
575 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
576 store i32 15, i32 addrspace(1)* %idx1, align 4
577 %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
578 store i32 62, i32 addrspace(1)* %idx2, align 4
579 %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
580 store i32 63, i32 addrspace(1)* %idx3, align 4
581 %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
582 store i32 11, i32 addrspace(1)* %idx4, align 4
583 %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
584 store i32 123, i32 addrspace(1)* %idx5, align 4
585 ret void
586}
587
588; GCN-LABEL: {{^}}merge_global_store_7_constants_i32:
589; GCN: buffer_store_dwordx4
590; GCN: buffer_store_dwordx2
591; GCN: buffer_store_dword v
592define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
593 store i32 34, i32 addrspace(1)* %out, align 4
594 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
595 store i32 999, i32 addrspace(1)* %idx1, align 4
596 %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
597 store i32 65, i32 addrspace(1)* %idx2, align 4
598 %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
599 store i32 33, i32 addrspace(1)* %idx3, align 4
600 %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
601 store i32 98, i32 addrspace(1)* %idx4, align 4
602 %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
603 store i32 91, i32 addrspace(1)* %idx5, align 4
604 %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
605 store i32 212, i32 addrspace(1)* %idx6, align 4
606 ret void
607}
608
609; GCN-LABEL: {{^}}merge_global_store_8_constants_i32:
610; XGCN: buffer_store_dwordx4
611; XGCN: buffer_store_dwordx4
612
613; GCN: buffer_store_dword v
614; GCN: buffer_store_dword v
615; GCN: buffer_store_dword v
616; GCN: buffer_store_dword v
617; GCN: buffer_store_dword v
618; GCN: buffer_store_dword v
619; GCN: buffer_store_dword v
620; GCN: buffer_store_dword v
621define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
622 store i32 34, i32 addrspace(1)* %out, align 4
623 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
624 store i32 999, i32 addrspace(1)* %idx1, align 4
625 %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
626 store i32 65, i32 addrspace(1)* %idx2, align 4
627 %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
628 store i32 33, i32 addrspace(1)* %idx3, align 4
629 %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
630 store i32 98, i32 addrspace(1)* %idx4, align 4
631 %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
632 store i32 91, i32 addrspace(1)* %idx5, align 4
633 %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
634 store i32 212, i32 addrspace(1)* %idx6, align 4
635 %idx7 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 7
636 store i32 999, i32 addrspace(1)* %idx7, align 4
637 ret void
638}
639
Matt Arsenault65ad1602015-05-24 00:51:27 +0000640declare void @llvm.AMDGPU.barrier.local() #1
641
642attributes #0 = { nounwind }
643attributes #1 = { noduplicate nounwind }