blob: 4a77e003ed56fd5f184806b7c8d7499fc594b8e3 [file] [log] [blame]
Matt Arsenaultbcdfee72016-05-02 20:13:51 +00001; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
Marek Olsak75170772015-01-27 17:27:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
Matt Arsenault2ba54c32013-10-30 23:30:05 +00003
Tom Stellard33e64c62015-02-04 20:49:52 +00004; SI-LABEL: {{^}}unaligned_load_store_i16_local:
5; SI: ds_read_u8
6; SI: ds_read_u8
7; SI: ds_write_b8
8; SI: ds_write_b8
9; SI: s_endpgm
10define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +000011 %v = load i16, i16 addrspace(3)* %p, align 1
Tom Stellard33e64c62015-02-04 20:49:52 +000012 store i16 %v, i16 addrspace(3)* %r, align 1
13 ret void
14}
15
16; SI-LABEL: {{^}}unaligned_load_store_i16_global:
17; SI: buffer_load_ubyte
18; SI: buffer_load_ubyte
19; SI: buffer_store_byte
20; SI: buffer_store_byte
21; SI: s_endpgm
22define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +000023 %v = load i16, i16 addrspace(1)* %p, align 1
Tom Stellard33e64c62015-02-04 20:49:52 +000024 store i16 %v, i16 addrspace(1)* %r, align 1
25 ret void
26}
27
Tom Stellardc6b299c2015-02-02 18:02:28 +000028; SI-LABEL: {{^}}unaligned_load_store_i32_local:
29; SI: ds_read_u8
30; SI: ds_read_u8
Matt Arsenaultbd223422015-01-14 01:35:17 +000031; SI: ds_read_u8
32; SI: ds_read_u8
Tom Stellardc7e448c2015-02-04 20:49:51 +000033; SI: ds_write_b8
34; SI: ds_write_b8
35; SI: ds_write_b8
36; SI: ds_write_b8
Tom Stellard326d6ec2014-11-05 14:50:53 +000037; SI: s_endpgm
Tom Stellardc6b299c2015-02-02 18:02:28 +000038define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +000039 %v = load i32, i32 addrspace(3)* %p, align 1
Matt Arsenault2ba54c32013-10-30 23:30:05 +000040 store i32 %v, i32 addrspace(3)* %r, align 1
41 ret void
42}
43
Tom Stellardc6b299c2015-02-02 18:02:28 +000044; SI-LABEL: {{^}}unaligned_load_store_i32_global:
45; SI: buffer_load_ubyte
46; SI: buffer_load_ubyte
47; SI: buffer_load_ubyte
48; SI: buffer_load_ubyte
Tom Stellardc7e448c2015-02-04 20:49:51 +000049; SI: buffer_store_byte
50; SI: buffer_store_byte
51; SI: buffer_store_byte
52; SI: buffer_store_byte
Tom Stellardc6b299c2015-02-02 18:02:28 +000053define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +000054 %v = load i32, i32 addrspace(1)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +000055 store i32 %v, i32 addrspace(1)* %r, align 1
56 ret void
57}
58
Matt Arsenaultbcdfee72016-05-02 20:13:51 +000059; SI-LABEL: {{^}}align2_load_store_i32_global:
60; SI: buffer_load_ushort
61; SI: buffer_load_ushort
62; SI: buffer_store_short
63; SI: buffer_store_short
64define void @align2_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
65 %v = load i32, i32 addrspace(1)* %p, align 2
66 store i32 %v, i32 addrspace(1)* %r, align 2
67 ret void
68}
69
70; SI-LABEL: {{^}}align2_load_store_i32_local:
71; SI: ds_read_u16
72; SI: ds_read_u16
73; SI: ds_write_b16
74; SI: ds_write_b16
75define void @align2_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
76 %v = load i32, i32 addrspace(3)* %p, align 2
77 store i32 %v, i32 addrspace(3)* %r, align 2
78 ret void
79}
80
81; FIXME: Unnecessary packing and unpacking of bytes.
Tom Stellardc6b299c2015-02-02 18:02:28 +000082; SI-LABEL: {{^}}unaligned_load_store_i64_local:
83; SI: ds_read_u8
84; SI: ds_read_u8
85; SI: ds_read_u8
86; SI: ds_read_u8
87; SI: ds_read_u8
88; SI: ds_read_u8
89; SI: ds_read_u8
90; SI: ds_read_u8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +000091
92; XSI-NOT: v_or_b32
93; XSI-NOT: v_lshl
Tom Stellardc7e448c2015-02-04 20:49:51 +000094; SI: ds_write_b8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +000095; XSI-NOT: v_or_b32
96; XSI-NOT: v_lshl
97
Tom Stellardc7e448c2015-02-04 20:49:51 +000098; SI: ds_write_b8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +000099; XSI-NOT: v_or_b32
100; XSI-NOT: v_lshl
101
Tom Stellardc7e448c2015-02-04 20:49:51 +0000102; SI: ds_write_b8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000103; XSI-NOT: v_or_b32
104; XSI-NOT: v_lshl
105
Tom Stellardc7e448c2015-02-04 20:49:51 +0000106; SI: ds_write_b8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000107; XSI-NOT: v_or_b32
108; XSI-NOT: v_lshl
109
Tom Stellardc7e448c2015-02-04 20:49:51 +0000110; SI: ds_write_b8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000111; XSI-NOT: v_or_b32
112; XSI-NOT: v_lshl
113
Tom Stellardc7e448c2015-02-04 20:49:51 +0000114; SI: ds_write_b8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000115; XSI-NOT: v_or_b32
116; XSI-NOT: v_lshl
117
Tom Stellardc7e448c2015-02-04 20:49:51 +0000118; SI: ds_write_b8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000119; XSI-NOT: v_or_b32
120; XSI-NOT: v_lshl
Tom Stellardc7e448c2015-02-04 20:49:51 +0000121; SI: ds_write_b8
Tom Stellardc6b299c2015-02-02 18:02:28 +0000122; SI: s_endpgm
123define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
David Blaikiea79ac142015-02-27 21:17:42 +0000124 %v = load i64, i64 addrspace(3)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +0000125 store i64 %v, i64 addrspace(3)* %r, align 1
126 ret void
127}
128
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000129; SI-LABEL: {{^}}unaligned_load_store_v2i32_local:
130; SI: ds_read_u8
131; SI: ds_read_u8
132; SI: ds_read_u8
133; SI: ds_read_u8
134; SI: ds_read_u8
135; SI: ds_read_u8
136; SI: ds_read_u8
137; SI: ds_read_u8
138
139; XSI-NOT: v_or_b32
140; XSI-NOT: v_lshl
141; SI: ds_write_b8
142; XSI-NOT: v_or_b32
143; XSI-NOT: v_lshl
144
145; SI: ds_write_b8
146; XSI-NOT: v_or_b32
147; XSI-NOT: v_lshl
148
149; SI: ds_write_b8
150; XSI-NOT: v_or_b32
151; XSI-NOT: v_lshl
152
153; SI: ds_write_b8
154; XSI-NOT: v_or_b32
155; XSI-NOT: v_lshl
156
157; SI: ds_write_b8
158; XSI-NOT: v_or_b32
159; XSI-NOT: v_lshl
160
161; SI: ds_write_b8
162; XSI-NOT: v_or_b32
163; XSI-NOT: v_lshl
164
165; SI: ds_write_b8
166; XSI-NOT: v_or_b32
167; XSI-NOT: v_lshl
168; SI: ds_write_b8
169; SI: s_endpgm
170define void @unaligned_load_store_v2i32_local(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
171 %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
172 store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
173 ret void
174}
175
Tom Stellardc6b299c2015-02-02 18:02:28 +0000176; SI-LABEL: {{^}}unaligned_load_store_i64_global:
177; SI: buffer_load_ubyte
178; SI: buffer_load_ubyte
179; SI: buffer_load_ubyte
180; SI: buffer_load_ubyte
181; SI: buffer_load_ubyte
182; SI: buffer_load_ubyte
183; SI: buffer_load_ubyte
184; SI: buffer_load_ubyte
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000185
186; XSI-NOT: v_or_
187; XSI-NOT: v_lshl
188
Tom Stellardc7e448c2015-02-04 20:49:51 +0000189; SI: buffer_store_byte
190; SI: buffer_store_byte
191; SI: buffer_store_byte
192; SI: buffer_store_byte
193; SI: buffer_store_byte
194; SI: buffer_store_byte
195; SI: buffer_store_byte
196; SI: buffer_store_byte
Tom Stellardc6b299c2015-02-02 18:02:28 +0000197define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
David Blaikiea79ac142015-02-27 21:17:42 +0000198 %v = load i64, i64 addrspace(1)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +0000199 store i64 %v, i64 addrspace(1)* %r, align 1
200 ret void
201}
202
203; SI-LABEL: {{^}}unaligned_load_store_v4i32_local:
Matt Arsenaultbd223422015-01-14 01:35:17 +0000204; SI: ds_read_u8
205; SI: ds_read_u8
206; SI: ds_read_u8
207; SI: ds_read_u8
208
209; SI: ds_read_u8
210; SI: ds_read_u8
211; SI: ds_read_u8
212; SI: ds_read_u8
213
214; SI: ds_read_u8
215; SI: ds_read_u8
216; SI: ds_read_u8
217; SI: ds_read_u8
218
219; SI: ds_read_u8
220; SI: ds_read_u8
221; SI: ds_read_u8
222; SI: ds_read_u8
223
Tom Stellardc7e448c2015-02-04 20:49:51 +0000224; SI: ds_write_b8
225; SI: ds_write_b8
226; SI: ds_write_b8
227; SI: ds_write_b8
228
229; SI: ds_write_b8
230; SI: ds_write_b8
231; SI: ds_write_b8
232; SI: ds_write_b8
233
234; SI: ds_write_b8
235; SI: ds_write_b8
236; SI: ds_write_b8
237; SI: ds_write_b8
238
239; SI: ds_write_b8
240; SI: ds_write_b8
241; SI: ds_write_b8
242; SI: ds_write_b8
Tom Stellard326d6ec2014-11-05 14:50:53 +0000243; SI: s_endpgm
Tom Stellardc6b299c2015-02-02 18:02:28 +0000244define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000245 %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
Matt Arsenault2ba54c32013-10-30 23:30:05 +0000246 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
247 ret void
248}
Matt Arsenault6f2a5262014-07-27 17:46:40 +0000249
Tom Stellardc6b299c2015-02-02 18:02:28 +0000250; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded.
251; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global
252; FIXME-SI: buffer_load_ubyte
253; FIXME-SI: buffer_load_ubyte
254; FIXME-SI: buffer_load_ubyte
255; FIXME-SI: buffer_load_ubyte
256; FIXME-SI: buffer_load_ubyte
257; FIXME-SI: buffer_load_ubyte
258; FIXME-SI: buffer_load_ubyte
259; FIXME-SI: buffer_load_ubyte
260; FIXME-SI: buffer_load_ubyte
261; FIXME-SI: buffer_load_ubyte
262; FIXME-SI: buffer_load_ubyte
263; FIXME-SI: buffer_load_ubyte
264; FIXME-SI: buffer_load_ubyte
265; FIXME-SI: buffer_load_ubyte
266; FIXME-SI: buffer_load_ubyte
267; FIXME-SI: buffer_load_ubyte
268define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000269 %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +0000270 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
271 ret void
272}
273
Tom Stellard79243d92014-10-01 17:15:17 +0000274; SI-LABEL: {{^}}load_lds_i64_align_4:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000275; SI: ds_read2_b32
276; SI: s_endpgm
Matt Arsenault6f2a5262014-07-27 17:46:40 +0000277define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000278 %val = load i64, i64 addrspace(3)* %in, align 4
Matt Arsenault6f2a5262014-07-27 17:46:40 +0000279 store i64 %val, i64 addrspace(1)* %out, align 8
280 ret void
281}
282
Matt Arsenault61cc9082014-10-10 22:16:07 +0000283; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
Tom Stellard326d6ec2014-11-05 14:50:53 +0000284; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
285; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +0000286define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
David Blaikie79e6c742015-02-27 19:29:02 +0000287 %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
David Blaikiea79ac142015-02-27 21:17:42 +0000288 %val = load i64, i64 addrspace(3)* %ptr, align 4
Tom Stellardf3fc5552014-08-22 18:49:35 +0000289 store i64 %val, i64 addrspace(1)* %out, align 8
290 ret void
291}
292
Tom Stellard79243d92014-10-01 17:15:17 +0000293; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
Tom Stellardf3fc5552014-08-22 18:49:35 +0000294; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
Tom Stellard1f3416a2015-04-08 01:09:19 +0000295; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
Tom Stellard326d6ec2014-11-05 14:50:53 +0000296; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +0000297define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
298 %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
David Blaikie79e6c742015-02-27 19:29:02 +0000299 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
Tom Stellardf3fc5552014-08-22 18:49:35 +0000300 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
David Blaikiea79ac142015-02-27 21:17:42 +0000301 %val = load i64, i64 addrspace(3)* %ptri64, align 4
Tom Stellardf3fc5552014-08-22 18:49:35 +0000302 store i64 %val, i64 addrspace(1)* %out, align 8
303 ret void
304}
305
Tom Stellardc7e448c2015-02-04 20:49:51 +0000306; SI-LABEL: {{^}}load_lds_i64_align_1:
Tom Stellard4cd6dcd2015-02-02 18:02:23 +0000307; SI: ds_read_u8
308; SI: ds_read_u8
309; SI: ds_read_u8
310; SI: ds_read_u8
311; SI: ds_read_u8
312; SI: ds_read_u8
313; SI: ds_read_u8
314; SI: ds_read_u8
Tom Stellardc7e448c2015-02-04 20:49:51 +0000315; SI: buffer_store_dwordx2
Tom Stellard4cd6dcd2015-02-02 18:02:23 +0000316; SI: s_endpgm
Tom Stellardc7e448c2015-02-04 20:49:51 +0000317
Tom Stellard4cd6dcd2015-02-02 18:02:23 +0000318define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000319 %val = load i64, i64 addrspace(3)* %in, align 1
Tom Stellard4cd6dcd2015-02-02 18:02:23 +0000320 store i64 %val, i64 addrspace(1)* %out, align 8
321 ret void
322}
Tom Stellardf3fc5552014-08-22 18:49:35 +0000323
Tom Stellard79243d92014-10-01 17:15:17 +0000324; SI-LABEL: {{^}}store_lds_i64_align_4:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000325; SI: ds_write2_b32
326; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +0000327define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
328 store i64 %val, i64 addrspace(3)* %out, align 4
329 ret void
330}
331
Matt Arsenault61cc9082014-10-10 22:16:07 +0000332; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
Tom Stellard326d6ec2014-11-05 14:50:53 +0000333; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
334; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +0000335define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
David Blaikie79e6c742015-02-27 19:29:02 +0000336 %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
Tom Stellardf3fc5552014-08-22 18:49:35 +0000337 store i64 0, i64 addrspace(3)* %ptr, align 4
338 ret void
339}
340
Tom Stellard79243d92014-10-01 17:15:17 +0000341; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
Tom Stellardf3fc5552014-08-22 18:49:35 +0000342; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
Tom Stellard1f3416a2015-04-08 01:09:19 +0000343; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
Tom Stellard326d6ec2014-11-05 14:50:53 +0000344; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +0000345define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
346 %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
David Blaikie79e6c742015-02-27 19:29:02 +0000347 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
Tom Stellardf3fc5552014-08-22 18:49:35 +0000348 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
349 store i64 0, i64 addrspace(3)* %out, align 4
350 ret void
351}