blob: 453c981db1a582cc6ca3de9f4470bdbc38550fc1 [file] [log] [blame]
Matt Arsenault7f681ac2016-07-01 23:03:44 +00001; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
2; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
Matt Arsenault2ba54c32013-10-30 23:30:05 +00004
Matt Arsenault7f681ac2016-07-01 23:03:44 +00005; SI-LABEL: {{^}}local_unaligned_load_store_i16:
6; SI: ds_read_u8
7; SI: ds_read_u8
8; SI: ds_write_b8
9; SI: ds_write_b8
10; SI: s_endpgm
Matt Arsenaultd1097a32016-06-02 19:54:26 +000011define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +000012 %v = load i16, i16 addrspace(3)* %p, align 1
Tom Stellard33e64c62015-02-04 20:49:52 +000013 store i16 %v, i16 addrspace(3)* %r, align 1
14 ret void
15}
16
Matt Arsenault7f681ac2016-07-01 23:03:44 +000017; SI-LABEL: {{^}}global_unaligned_load_store_i16:
18; ALIGNED: buffer_load_ubyte
19; ALIGNED: buffer_load_ubyte
20; ALIGNED: buffer_store_byte
21; ALIGNED: buffer_store_byte
Matt Arsenaultd1097a32016-06-02 19:54:26 +000022
Matt Arsenault7f681ac2016-07-01 23:03:44 +000023; UNALIGNED: buffer_load_ushort
24; UNALIGNED: buffer_store_short
25; SI: s_endpgm
Matt Arsenault8af47a02016-07-01 22:55:55 +000026define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +000027 %v = load i16, i16 addrspace(1)* %p, align 1
Tom Stellard33e64c62015-02-04 20:49:52 +000028 store i16 %v, i16 addrspace(1)* %r, align 1
29 ret void
30}
31
Matt Arsenaultd1097a32016-06-02 19:54:26 +000032; FUNC-LABEL: {{^}}local_unaligned_load_store_i32:
Matt Arsenault8af47a02016-07-01 22:55:55 +000033
34; SI: ds_read_u8
35; SI: ds_read_u8
36; SI: ds_read_u8
37; SI: ds_read_u8
38; SI-NOT: v_or
39; SI-NOT: v_lshl
40; SI: ds_write_b8
41; SI: ds_write_b8
42; SI: ds_write_b8
43; SI: ds_write_b8
44; SI: s_endpgm
Matt Arsenaultd1097a32016-06-02 19:54:26 +000045define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +000046 %v = load i32, i32 addrspace(3)* %p, align 1
Matt Arsenault2ba54c32013-10-30 23:30:05 +000047 store i32 %v, i32 addrspace(3)* %r, align 1
48 ret void
49}
50
Matt Arsenault7f681ac2016-07-01 23:03:44 +000051; SI-LABEL: {{^}}global_unaligned_load_store_i32:
52; ALIGNED: buffer_load_ubyte
53; ALIGNED: buffer_load_ubyte
54; ALIGNED: buffer_load_ubyte
55; ALIGNED: buffer_load_ubyte
56; ALIGNED: buffer_store_byte
57; ALIGNED: buffer_store_byte
58; ALIGNED: buffer_store_byte
59; ALIGNED: buffer_store_byte
Matt Arsenaultd1097a32016-06-02 19:54:26 +000060
Matt Arsenault7f681ac2016-07-01 23:03:44 +000061; UNALIGNED: buffer_load_dword
62; UNALIGNED: buffer_store_dword
Matt Arsenaultd1097a32016-06-02 19:54:26 +000063define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +000064 %v = load i32, i32 addrspace(1)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +000065 store i32 %v, i32 addrspace(1)* %r, align 1
66 ret void
67}
68
Matt Arsenault7f681ac2016-07-01 23:03:44 +000069; SI-LABEL: {{^}}global_align2_load_store_i32:
70; ALIGNED: buffer_load_ushort
71; ALIGNED: buffer_load_ushort
72; ALIGNED: buffer_store_short
73; ALIGNED: buffer_store_short
Matt Arsenaultd1097a32016-06-02 19:54:26 +000074
Matt Arsenault7f681ac2016-07-01 23:03:44 +000075; UNALIGNED: buffer_load_dword
76; UNALIGNED: buffer_store_dword
Matt Arsenaultd1097a32016-06-02 19:54:26 +000077define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
Matt Arsenaultbcdfee72016-05-02 20:13:51 +000078 %v = load i32, i32 addrspace(1)* %p, align 2
79 store i32 %v, i32 addrspace(1)* %r, align 2
80 ret void
81}
82
Matt Arsenaultd1097a32016-06-02 19:54:26 +000083; FUNC-LABEL: {{^}}local_align2_load_store_i32:
84; GCN: ds_read_u16
85; GCN: ds_read_u16
86; GCN: ds_write_b16
87; GCN: ds_write_b16
88define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
Matt Arsenaultbcdfee72016-05-02 20:13:51 +000089 %v = load i32, i32 addrspace(3)* %p, align 2
90 store i32 %v, i32 addrspace(3)* %r, align 2
91 ret void
92}
93
Matt Arsenaultd1097a32016-06-02 19:54:26 +000094; FUNC-LABEL: {{^}}local_unaligned_load_store_i64:
Matt Arsenault8af47a02016-07-01 22:55:55 +000095; SI: ds_read_u8
96; SI: ds_read_u8
97; SI: ds_read_u8
98; SI: ds_read_u8
99; SI: ds_read_u8
100; SI: ds_read_u8
101; SI: ds_read_u8
102; SI: ds_read_u8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000103
Matt Arsenault8af47a02016-07-01 22:55:55 +0000104; SI-NOT: v_or_b32
105; SI-NOT: v_lshl
106; SI: ds_write_b8
107; SI-NOT: v_or_b32
108; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000109
Matt Arsenault8af47a02016-07-01 22:55:55 +0000110; SI: ds_write_b8
111; SI-NOT: v_or_b32
112; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000113
Matt Arsenault8af47a02016-07-01 22:55:55 +0000114; SI: ds_write_b8
115; SI-NOT: v_or_b32
116; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000117
Matt Arsenault8af47a02016-07-01 22:55:55 +0000118; SI: ds_write_b8
119; SI-NOT: v_or_b32
120; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000121
Matt Arsenault8af47a02016-07-01 22:55:55 +0000122; SI: ds_write_b8
123; SI-NOT: v_or_b32
124; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000125
Matt Arsenault8af47a02016-07-01 22:55:55 +0000126; SI: ds_write_b8
127; SI-NOT: v_or_b32
128; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000129
Matt Arsenault8af47a02016-07-01 22:55:55 +0000130; SI: ds_write_b8
131; SI-NOT: v_or_b32
132; SI-NOT: v_lshl
133; SI: ds_write_b8
134; SI: s_endpgm
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000135define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000136 %v = load i64, i64 addrspace(3)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +0000137 store i64 %v, i64 addrspace(3)* %r, align 1
138 ret void
139}
140
Matt Arsenault8af47a02016-07-01 22:55:55 +0000141; SI-LABEL: {{^}}local_unaligned_load_store_v2i32:
142; SI: ds_read_u8
143; SI: ds_read_u8
144; SI: ds_read_u8
145; SI: ds_read_u8
146; SI: ds_read_u8
147; SI: ds_read_u8
148; SI: ds_read_u8
149; SI: ds_read_u8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000150
Matt Arsenault8af47a02016-07-01 22:55:55 +0000151; SI-NOT: v_or_b32
152; SI-NOT: v_lshl
153; SI: ds_write_b8
154; SI-NOT: v_or_b32
155; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000156
Matt Arsenault8af47a02016-07-01 22:55:55 +0000157; SI: ds_write_b8
158; SI-NOT: v_or_b32
159; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000160
Matt Arsenault8af47a02016-07-01 22:55:55 +0000161; SI: ds_write_b8
162; SI-NOT: v_or_b32
163; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000164
Matt Arsenault8af47a02016-07-01 22:55:55 +0000165; SI: ds_write_b8
166; SI-NOT: v_or_b32
167; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000168
Matt Arsenault8af47a02016-07-01 22:55:55 +0000169; SI: ds_write_b8
170; SI-NOT: v_or_b32
171; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000172
Matt Arsenault8af47a02016-07-01 22:55:55 +0000173; SI: ds_write_b8
174; SI-NOT: v_or_b32
175; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000176
Matt Arsenault8af47a02016-07-01 22:55:55 +0000177; SI: ds_write_b8
178; SI-NOT: v_or_b32
179; SI-NOT: v_lshl
180; SI: ds_write_b8
181; SI: s_endpgm
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000182define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) #0 {
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000183 %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
184 store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
185 ret void
186}
187
Matt Arsenault8af47a02016-07-01 22:55:55 +0000188; SI-LABEL: {{^}}global_align2_load_store_i64:
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000189; ALIGNED: buffer_load_ushort
190; ALIGNED: buffer_load_ushort
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000191
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000192; ALIGNED-NOT: v_or_
193; ALIGNED-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000194
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000195; ALIGNED: buffer_load_ushort
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000196
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000197; ALIGNED-NOT: v_or_
198; ALIGNED-NOT: v_lshl
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000199
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000200; ALIGNED: buffer_load_ushort
Matt Arsenault8af47a02016-07-01 22:55:55 +0000201
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000202; ALIGNED-NOT: v_or_
203; ALIGNED-NOT: v_lshl
Matt Arsenault8af47a02016-07-01 22:55:55 +0000204
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000205; ALIGNED: buffer_store_short
206; ALIGNED: buffer_store_short
207; ALIGNED: buffer_store_short
208; ALIGNED: buffer_store_short
209
210; UNALIGNED: buffer_load_dwordx2
211; UNALIGNED: buffer_store_dwordx2
212define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
Matt Arsenault8af47a02016-07-01 22:55:55 +0000213 %v = load i64, i64 addrspace(1)* %p, align 2
214 store i64 %v, i64 addrspace(1)* %r, align 2
215 ret void
216}
217
218; SI-LABEL: {{^}}unaligned_load_store_i64_global:
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000219; ALIGNED: buffer_load_ubyte
220; ALIGNED: buffer_load_ubyte
221; ALIGNED: buffer_load_ubyte
222; ALIGNED: buffer_load_ubyte
223; ALIGNED: buffer_load_ubyte
224; ALIGNED: buffer_load_ubyte
225; ALIGNED: buffer_load_ubyte
226; ALIGNED: buffer_load_ubyte
Matt Arsenault8af47a02016-07-01 22:55:55 +0000227
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000228; ALIGNED-NOT: v_or_
229; ALIGNED-NOT: v_lshl
Matt Arsenault8af47a02016-07-01 22:55:55 +0000230
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000231; ALIGNED: buffer_store_byte
232; ALIGNED: buffer_store_byte
233; ALIGNED: buffer_store_byte
234; ALIGNED: buffer_store_byte
235; ALIGNED: buffer_store_byte
236; ALIGNED: buffer_store_byte
237; ALIGNED: buffer_store_byte
238; ALIGNED: buffer_store_byte
239
240; UNALIGNED: buffer_load_dwordx2
241; UNALIGNED: buffer_store_dwordx2
242define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000243 %v = load i64, i64 addrspace(1)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +0000244 store i64 %v, i64 addrspace(1)* %r, align 1
245 ret void
246}
247
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000248; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32:
249; GCN: ds_read_u8
250; GCN: ds_read_u8
251; GCN: ds_read_u8
252; GCN: ds_read_u8
Matt Arsenaultbd223422015-01-14 01:35:17 +0000253
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000254; GCN: ds_read_u8
255; GCN: ds_read_u8
256; GCN: ds_read_u8
257; GCN: ds_read_u8
Matt Arsenaultbd223422015-01-14 01:35:17 +0000258
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000259; GCN: ds_read_u8
260; GCN: ds_read_u8
261; GCN: ds_read_u8
262; GCN: ds_read_u8
Matt Arsenaultbd223422015-01-14 01:35:17 +0000263
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000264; GCN: ds_read_u8
265; GCN: ds_read_u8
266; GCN: ds_read_u8
267; GCN: ds_read_u8
Matt Arsenaultbd223422015-01-14 01:35:17 +0000268
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000269; GCN: ds_write_b8
270; GCN: ds_write_b8
271; GCN: ds_write_b8
272; GCN: ds_write_b8
Tom Stellardc7e448c2015-02-04 20:49:51 +0000273
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000274; GCN: ds_write_b8
275; GCN: ds_write_b8
276; GCN: ds_write_b8
277; GCN: ds_write_b8
Tom Stellardc7e448c2015-02-04 20:49:51 +0000278
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000279; GCN: ds_write_b8
280; GCN: ds_write_b8
281; GCN: ds_write_b8
282; GCN: ds_write_b8
Tom Stellardc7e448c2015-02-04 20:49:51 +0000283
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000284; GCN: ds_write_b8
285; GCN: ds_write_b8
286; GCN: ds_write_b8
287; GCN: ds_write_b8
288; GCN: s_endpgm
289define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000290 %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
Matt Arsenault2ba54c32013-10-30 23:30:05 +0000291 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
292 ret void
293}
Matt Arsenault6f2a5262014-07-27 17:46:40 +0000294
Matt Arsenault8af47a02016-07-01 22:55:55 +0000295; SI-LABEL: {{^}}global_unaligned_load_store_v4i32
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000296; ALIGNED: buffer_load_ubyte
297; ALIGNED: buffer_load_ubyte
298; ALIGNED: buffer_load_ubyte
299; ALIGNED: buffer_load_ubyte
300; ALIGNED: buffer_load_ubyte
301; ALIGNED: buffer_load_ubyte
302; ALIGNED: buffer_load_ubyte
303; ALIGNED: buffer_load_ubyte
304; ALIGNED: buffer_load_ubyte
305; ALIGNED: buffer_load_ubyte
306; ALIGNED: buffer_load_ubyte
307; ALIGNED: buffer_load_ubyte
308; ALIGNED: buffer_load_ubyte
309; ALIGNED: buffer_load_ubyte
310; ALIGNED: buffer_load_ubyte
311; ALIGNED: buffer_load_ubyte
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000312
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000313; ALIGNED: buffer_store_byte
314; ALIGNED: buffer_store_byte
315; ALIGNED: buffer_store_byte
316; ALIGNED: buffer_store_byte
317; ALIGNED: buffer_store_byte
318; ALIGNED: buffer_store_byte
319; ALIGNED: buffer_store_byte
320; ALIGNED: buffer_store_byte
321; ALIGNED: buffer_store_byte
322; ALIGNED: buffer_store_byte
323; ALIGNED: buffer_store_byte
324; ALIGNED: buffer_store_byte
325; ALIGNED: buffer_store_byte
326; ALIGNED: buffer_store_byte
327; ALIGNED: buffer_store_byte
328; ALIGNED: buffer_store_byte
329
330; UNALIGNED: buffer_load_dwordx4
331; UNALIGNED: buffer_store_dwordx4
332define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000333 %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +0000334 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
335 ret void
336}
337
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000338; FUNC-LABEL: {{^}}local_load_i64_align_4:
339; GCN: ds_read2_b32
340define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000341 %val = load i64, i64 addrspace(3)* %in, align 4
Matt Arsenault6f2a5262014-07-27 17:46:40 +0000342 store i64 %val, i64 addrspace(1)* %out, align 8
343 ret void
344}
345
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000346; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset
347; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
348define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
David Blaikie79e6c742015-02-27 19:29:02 +0000349 %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
David Blaikiea79ac142015-02-27 21:17:42 +0000350 %val = load i64, i64 addrspace(3)* %ptr, align 4
Tom Stellardf3fc5552014-08-22 18:49:35 +0000351 store i64 %val, i64 addrspace(1)* %out, align 8
352 ret void
353}
354
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000355; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset:
Tom Stellardf3fc5552014-08-22 18:49:35 +0000356; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000357; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
358; GCN: s_endpgm
359define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
Tom Stellardf3fc5552014-08-22 18:49:35 +0000360 %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
David Blaikie79e6c742015-02-27 19:29:02 +0000361 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
Tom Stellardf3fc5552014-08-22 18:49:35 +0000362 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
David Blaikiea79ac142015-02-27 21:17:42 +0000363 %val = load i64, i64 addrspace(3)* %ptri64, align 4
Tom Stellardf3fc5552014-08-22 18:49:35 +0000364 store i64 %val, i64 addrspace(1)* %out, align 8
365 ret void
366}
367
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000368; FUNC-LABEL: {{^}}local_load_i64_align_1:
369; GCN: ds_read_u8
370; GCN: ds_read_u8
371; GCN: ds_read_u8
372; GCN: ds_read_u8
373; GCN: ds_read_u8
374; GCN: ds_read_u8
375; GCN: ds_read_u8
376; GCN: ds_read_u8
377; GCN: store_dwordx2
378define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000379 %val = load i64, i64 addrspace(3)* %in, align 1
Tom Stellard4cd6dcd2015-02-02 18:02:23 +0000380 store i64 %val, i64 addrspace(1)* %out, align 8
381 ret void
382}
Tom Stellardf3fc5552014-08-22 18:49:35 +0000383
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000384; FUNC-LABEL: {{^}}local_store_i64_align_4:
385; GCN: ds_write2_b32
386define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
Tom Stellardf3fc5552014-08-22 18:49:35 +0000387 store i64 %val, i64 addrspace(3)* %out, align 4
388 ret void
389}
390
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000391; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset
392; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
393; GCN: s_endpgm
394define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
David Blaikie79e6c742015-02-27 19:29:02 +0000395 %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
Tom Stellardf3fc5552014-08-22 18:49:35 +0000396 store i64 0, i64 addrspace(3)* %ptr, align 4
397 ret void
398}
399
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000400; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset:
Tom Stellardf3fc5552014-08-22 18:49:35 +0000401; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000402; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
403; GCN: s_endpgm
404define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
Tom Stellardf3fc5552014-08-22 18:49:35 +0000405 %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
David Blaikie79e6c742015-02-27 19:29:02 +0000406 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
Tom Stellardf3fc5552014-08-22 18:49:35 +0000407 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
408 store i64 0, i64 addrspace(3)* %out, align 4
409 ret void
410}
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000411
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000412; SI-LABEL: {{^}}constant_unaligned_load_i32:
413; ALIGNED: buffer_load_ubyte
414; ALIGNED: buffer_load_ubyte
415; ALIGNED: buffer_load_ubyte
416; ALIGNED: buffer_load_ubyte
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000417
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000418; UNALIGNED: s_load_dword
419
420; SI: buffer_store_dword
421define void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
422 %v = load i32, i32 addrspace(2)* %p, align 1
423 store i32 %v, i32 addrspace(1)* %r, align 4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000424 ret void
425}
426
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000427; SI-LABEL: {{^}}constant_align2_load_i32:
428; ALIGNED: buffer_load_ushort
429; ALIGNED: buffer_load_ushort
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000430
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000431; UNALIGNED: s_load_dword
432; UNALIGNED: buffer_store_dword
433define void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
434 %v = load i32, i32 addrspace(2)* %p, align 2
435 store i32 %v, i32 addrspace(1)* %r, align 4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000436 ret void
437}
438
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000439; SI-LABEL: {{^}}constant_align2_load_i64:
440; ALIGNED: buffer_load_ushort
441; ALIGNED: buffer_load_ushort
442; ALIGNED: buffer_load_ushort
443; ALIGNED: buffer_load_ushort
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000444
Matt Arsenault7f681ac2016-07-01 23:03:44 +0000445; UNALIGNED: s_load_dwordx2
446; UNALIGNED: buffer_store_dwordx2
447define void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
448 %v = load i64, i64 addrspace(2)* %p, align 2
449 store i64 %v, i64 addrspace(1)* %r, align 4
450 ret void
451}
452
453; SI-LABEL: {{^}}constant_align4_load_i64:
454; SI: s_load_dwordx2
455; SI: buffer_store_dwordx2
456define void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
457 %v = load i64, i64 addrspace(2)* %p, align 4
458 store i64 %v, i64 addrspace(1)* %r, align 4
459 ret void
460}
461
462; SI-LABEL: {{^}}constant_align4_load_v4i32:
463; SI: s_load_dwordx4
464; SI: buffer_store_dwordx4
465define void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
466 %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 4
467 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
468 ret void
469}
470
471; SI-LABEL: {{^}}constant_unaligned_load_v2i32:
472; ALIGNED: buffer_load_ubyte
473; ALIGNED: buffer_load_ubyte
474; ALIGNED: buffer_load_ubyte
475; ALIGNED: buffer_load_ubyte
476
477; ALIGNED: buffer_load_ubyte
478; ALIGNED: buffer_load_ubyte
479; ALIGNED: buffer_load_ubyte
480; ALIGNED: buffer_load_ubyte
481
482; UNALIGNED: buffer_load_dwordx2
483
484; SI: buffer_store_dwordx2
485define void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> addrspace(1)* %r) #0 {
486 %v = load <2 x i32>, <2 x i32> addrspace(2)* %p, align 1
487 store <2 x i32> %v, <2 x i32> addrspace(1)* %r, align 4
488 ret void
489}
490
491; SI-LABEL: {{^}}constant_unaligned_load_v4i32:
492; ALIGNED: buffer_load_ubyte
493; ALIGNED: buffer_load_ubyte
494; ALIGNED: buffer_load_ubyte
495; ALIGNED: buffer_load_ubyte
496
497; ALIGNED: buffer_load_ubyte
498; ALIGNED: buffer_load_ubyte
499; ALIGNED: buffer_load_ubyte
500; ALIGNED: buffer_load_ubyte
501
502; ALIGNED: buffer_load_ubyte
503; ALIGNED: buffer_load_ubyte
504; ALIGNED: buffer_load_ubyte
505; ALIGNED: buffer_load_ubyte
506
507; ALIGNED: buffer_load_ubyte
508; ALIGNED: buffer_load_ubyte
509; ALIGNED: buffer_load_ubyte
510; ALIGNED: buffer_load_ubyte
511
512; UNALIGNED: buffer_load_dwordx4
513
514; SI: buffer_store_dwordx4
515define void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
516 %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 1
517 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
518 ret void
519}
520
521; SI-LABEL: {{^}}constant_align4_load_i8:
522; SI: buffer_load_ubyte
523; SI: buffer_store_byte
524define void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
525 %v = load i8, i8 addrspace(2)* %p, align 4
526 store i8 %v, i8 addrspace(1)* %r, align 4
527 ret void
528}
529
530; SI-LABEL: {{^}}constant_align2_load_i8:
531; SI: buffer_load_ubyte
532; SI: buffer_store_byte
533define void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
534 %v = load i8, i8 addrspace(2)* %p, align 2
535 store i8 %v, i8 addrspace(1)* %r, align 2
536 ret void
537}
538
539; SI-LABEL: {{^}}constant_align4_merge_load_2_i32:
540; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
541; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]]
542; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]]
543; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
544define void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
545 %gep0 = getelementptr i32, i32 addrspace(2)* %p, i64 1
546 %v0 = load i32, i32 addrspace(2)* %p, align 4
547 %v1 = load i32, i32 addrspace(2)* %gep0, align 4
548
549 %gep1 = getelementptr i32, i32 addrspace(1)* %r, i64 1
550 store i32 %v0, i32 addrspace(1)* %r, align 4
551 store i32 %v1, i32 addrspace(1)* %gep1, align 4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000552 ret void
553}
554
Matt Arsenaultb50eb8d2016-08-31 21:52:27 +0000555; SI-LABEL: {{^}}local_load_align1_v16i8:
556; SI: ds_read_u8
557; SI: ds_read_u8
558; SI: ds_read_u8
559; SI: ds_read_u8
560; SI: ds_read_u8
561; SI: ds_read_u8
562; SI: ds_read_u8
563; SI: ds_read_u8
564; SI: ds_read_u8
565; SI: ds_read_u8
566; SI: ds_read_u8
567; SI: ds_read_u8
568; SI: ds_read_u8
569; SI: ds_read_u8
570; SI: ds_read_u8
571; SI: ds_read_u8
572
573; SI: ScratchSize: 0{{$}}
574define void @local_load_align1_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(3)* %in) #0 {
575 %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in, align 1
576 store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
577 ret void
578}
579
580; SI-LABEL: {{^}}local_store_align1_v16i8:
581; SI: ds_write_b8
582; SI: ds_write_b8
583; SI: ds_write_b8
584; SI: ds_write_b8
585; SI: ds_write_b8
586; SI: ds_write_b8
587; SI: ds_write_b8
588; SI: ds_write_b8
589; SI: ds_write_b8
590; SI: ds_write_b8
591; SI: ds_write_b8
592; SI: ds_write_b8
593; SI: ds_write_b8
594; SI: ds_write_b8
595; SI: ds_write_b8
596; SI: ds_write_b8
597
598; SI: ScratchSize: 0{{$}}
599define void @local_store_align1_v16i8(<16 x i8> addrspace(3)* %out) #0 {
600 store <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* %out, align 1
601 ret void
602}
603
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000604attributes #0 = { nounwind }