blob: 8da4d80806dd5a405cf658367789d0089791e9c4 [file] [log] [blame]
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Matt Arsenault2ba54c32013-10-30 23:30:05 +00005
Matt Arsenaultd1097a32016-06-02 19:54:26 +00006; FUNC-LABEL: {{^}}local_unaligned_load_store_i16:
7; GCN: ds_read_u8
8; GCN: ds_read_u8
9; GCN: ds_write_b8
10; GCN: ds_write_b8
11; GCN: s_endpgm
12define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +000013 %v = load i16, i16 addrspace(3)* %p, align 1
Tom Stellard33e64c62015-02-04 20:49:52 +000014 store i16 %v, i16 addrspace(3)* %r, align 1
15 ret void
16}
17
Matt Arsenault8af47a02016-07-01 22:55:55 +000018; FUNC-LABEL: {{^}}global_unaligned_load_store_i16:
Matt Arsenaultd1097a32016-06-02 19:54:26 +000019; GCN-NOHSA: buffer_load_ubyte
20; GCN-NOHSA: buffer_load_ubyte
21; GCN-NOHSA: buffer_store_byte
22; GCN-NOHSA: buffer_store_byte
23
24; GCN-HSA: flat_load_ubyte
25; GCN-HSA: flat_load_ubyte
26; GCN-HSA: flat_store_byte
27; GCN-HSA: flat_store_byte
Matt Arsenault8af47a02016-07-01 22:55:55 +000028define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +000029 %v = load i16, i16 addrspace(1)* %p, align 1
Tom Stellard33e64c62015-02-04 20:49:52 +000030 store i16 %v, i16 addrspace(1)* %r, align 1
31 ret void
32}
33
Matt Arsenaultd1097a32016-06-02 19:54:26 +000034; FUNC-LABEL: {{^}}local_unaligned_load_store_i32:
Matt Arsenault8af47a02016-07-01 22:55:55 +000035
36; SI: ds_read_u8
37; SI: ds_read_u8
38; SI: ds_read_u8
39; SI: ds_read_u8
40; SI-NOT: v_or
41; SI-NOT: v_lshl
42; SI: ds_write_b8
43; SI: ds_write_b8
44; SI: ds_write_b8
45; SI: ds_write_b8
46; SI: s_endpgm
Matt Arsenaultd1097a32016-06-02 19:54:26 +000047define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +000048 %v = load i32, i32 addrspace(3)* %p, align 1
Matt Arsenault2ba54c32013-10-30 23:30:05 +000049 store i32 %v, i32 addrspace(3)* %r, align 1
50 ret void
51}
52
Matt Arsenaultd1097a32016-06-02 19:54:26 +000053; FUNC-LABEL: {{^}}global_unaligned_load_store_i32:
54; GCN-NOHSA: buffer_load_ubyte
55; GCN-NOHSA: buffer_load_ubyte
56; GCN-NOHSA: buffer_load_ubyte
57; GCN-NOHSA: buffer_load_ubyte
58; GCN-NOHSA: buffer_store_byte
59; GCN-NOHSA: buffer_store_byte
60; GCN-NOHSA: buffer_store_byte
61; GCN-NOHSA: buffer_store_byte
62
63; GCN-HSA: flat_load_ubyte
64; GCN-HSA: flat_load_ubyte
65; GCN-HSA: flat_load_ubyte
66; GCN-HSA: flat_load_ubyte
67; GCN-HSA: flat_store_byte
68; GCN-HSA: flat_store_byte
69; GCN-HSA: flat_store_byte
70; GCN-HSA: flat_store_byte
71define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +000072 %v = load i32, i32 addrspace(1)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +000073 store i32 %v, i32 addrspace(1)* %r, align 1
74 ret void
75}
76
Matt Arsenaultd1097a32016-06-02 19:54:26 +000077; FUNC-LABEL: {{^}}global_align2_load_store_i32:
78; GCN-NOHSA: buffer_load_ushort
79; GCN-NOHSA: buffer_load_ushort
80; GCN-NOHSA: buffer_store_short
81; GCN-NOHSA: buffer_store_short
82
83; GCN-HSA: flat_load_ushort
84; GCN-HSA: flat_load_ushort
85; GCN-HSA: flat_store_short
86; GCN-HSA: flat_store_short
87define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
Matt Arsenaultbcdfee72016-05-02 20:13:51 +000088 %v = load i32, i32 addrspace(1)* %p, align 2
89 store i32 %v, i32 addrspace(1)* %r, align 2
90 ret void
91}
92
Matt Arsenaultd1097a32016-06-02 19:54:26 +000093; FUNC-LABEL: {{^}}local_align2_load_store_i32:
94; GCN: ds_read_u16
95; GCN: ds_read_u16
96; GCN: ds_write_b16
97; GCN: ds_write_b16
98define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
Matt Arsenaultbcdfee72016-05-02 20:13:51 +000099 %v = load i32, i32 addrspace(3)* %p, align 2
100 store i32 %v, i32 addrspace(3)* %r, align 2
101 ret void
102}
103
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000104; FUNC-LABEL: {{^}}local_unaligned_load_store_i64:
Matt Arsenault8af47a02016-07-01 22:55:55 +0000105; SI: ds_read_u8
106; SI: ds_read_u8
107; SI: ds_read_u8
108; SI: ds_read_u8
109; SI: ds_read_u8
110; SI: ds_read_u8
111; SI: ds_read_u8
112; SI: ds_read_u8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000113
Matt Arsenault8af47a02016-07-01 22:55:55 +0000114; SI-NOT: v_or_b32
115; SI-NOT: v_lshl
116; SI: ds_write_b8
117; SI-NOT: v_or_b32
118; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000119
Matt Arsenault8af47a02016-07-01 22:55:55 +0000120; SI: ds_write_b8
121; SI-NOT: v_or_b32
122; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000123
Matt Arsenault8af47a02016-07-01 22:55:55 +0000124; SI: ds_write_b8
125; SI-NOT: v_or_b32
126; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000127
Matt Arsenault8af47a02016-07-01 22:55:55 +0000128; SI: ds_write_b8
129; SI-NOT: v_or_b32
130; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000131
Matt Arsenault8af47a02016-07-01 22:55:55 +0000132; SI: ds_write_b8
133; SI-NOT: v_or_b32
134; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000135
Matt Arsenault8af47a02016-07-01 22:55:55 +0000136; SI: ds_write_b8
137; SI-NOT: v_or_b32
138; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000139
Matt Arsenault8af47a02016-07-01 22:55:55 +0000140; SI: ds_write_b8
141; SI-NOT: v_or_b32
142; SI-NOT: v_lshl
143; SI: ds_write_b8
144; SI: s_endpgm
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000145define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
David Blaikiea79ac142015-02-27 21:17:42 +0000146 %v = load i64, i64 addrspace(3)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +0000147 store i64 %v, i64 addrspace(3)* %r, align 1
148 ret void
149}
150
Matt Arsenault8af47a02016-07-01 22:55:55 +0000151; SI-LABEL: {{^}}local_unaligned_load_store_v2i32:
152; SI: ds_read_u8
153; SI: ds_read_u8
154; SI: ds_read_u8
155; SI: ds_read_u8
156; SI: ds_read_u8
157; SI: ds_read_u8
158; SI: ds_read_u8
159; SI: ds_read_u8
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000160
Matt Arsenault8af47a02016-07-01 22:55:55 +0000161; SI-NOT: v_or_b32
162; SI-NOT: v_lshl
163; SI: ds_write_b8
164; SI-NOT: v_or_b32
165; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000166
Matt Arsenault8af47a02016-07-01 22:55:55 +0000167; SI: ds_write_b8
168; SI-NOT: v_or_b32
169; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000170
Matt Arsenault8af47a02016-07-01 22:55:55 +0000171; SI: ds_write_b8
172; SI-NOT: v_or_b32
173; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000174
Matt Arsenault8af47a02016-07-01 22:55:55 +0000175; SI: ds_write_b8
176; SI-NOT: v_or_b32
177; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000178
Matt Arsenault8af47a02016-07-01 22:55:55 +0000179; SI: ds_write_b8
180; SI-NOT: v_or_b32
181; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000182
Matt Arsenault8af47a02016-07-01 22:55:55 +0000183; SI: ds_write_b8
184; SI-NOT: v_or_b32
185; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000186
Matt Arsenault8af47a02016-07-01 22:55:55 +0000187; SI: ds_write_b8
188; SI-NOT: v_or_b32
189; SI-NOT: v_lshl
190; SI: ds_write_b8
191; SI: s_endpgm
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000192define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000193 %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
194 store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
195 ret void
196}
197
Matt Arsenault8af47a02016-07-01 22:55:55 +0000198; SI-LABEL: {{^}}global_align2_load_store_i64:
199; SI: buffer_load_ushort
200; SI: buffer_load_ushort
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000201
Matt Arsenault8af47a02016-07-01 22:55:55 +0000202; SI-NOT: v_or_
203; SI-NOT: v_lshl
Matt Arsenaultbcdfee72016-05-02 20:13:51 +0000204
Matt Arsenault8af47a02016-07-01 22:55:55 +0000205; SI: buffer_load_ushort
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000206
Matt Arsenault8af47a02016-07-01 22:55:55 +0000207; SI-NOT: v_or_
208; SI-NOT: v_lshl
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000209
Matt Arsenault8af47a02016-07-01 22:55:55 +0000210; SI: buffer_load_ushort
211
212; SI-NOT: v_or_
213; SI-NOT: v_lshl
214
215; SI: buffer_store_short
216; SI: buffer_store_short
217; SI: buffer_store_short
218; SI: buffer_store_short
219define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
220 %v = load i64, i64 addrspace(1)* %p, align 2
221 store i64 %v, i64 addrspace(1)* %r, align 2
222 ret void
223}
224
225; SI-LABEL: {{^}}unaligned_load_store_i64_global:
226; SI: buffer_load_ubyte
227; SI: buffer_load_ubyte
228; SI: buffer_load_ubyte
229; SI: buffer_load_ubyte
230; SI: buffer_load_ubyte
231; SI: buffer_load_ubyte
232; SI: buffer_load_ubyte
233; SI: buffer_load_ubyte
234
235; SI-NOT: v_or_
236; SI-NOT: v_lshl
237
238; SI: buffer_store_byte
239; SI: buffer_store_byte
240; SI: buffer_store_byte
241; SI: buffer_store_byte
242; SI: buffer_store_byte
243; SI: buffer_store_byte
244; SI: buffer_store_byte
245; SI: buffer_store_byte
246define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
David Blaikiea79ac142015-02-27 21:17:42 +0000247 %v = load i64, i64 addrspace(1)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +0000248 store i64 %v, i64 addrspace(1)* %r, align 1
249 ret void
250}
251
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000252; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32:
253; GCN: ds_read_u8
254; GCN: ds_read_u8
255; GCN: ds_read_u8
256; GCN: ds_read_u8
Matt Arsenaultbd223422015-01-14 01:35:17 +0000257
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000258; GCN: ds_read_u8
259; GCN: ds_read_u8
260; GCN: ds_read_u8
261; GCN: ds_read_u8
Matt Arsenaultbd223422015-01-14 01:35:17 +0000262
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000263; GCN: ds_read_u8
264; GCN: ds_read_u8
265; GCN: ds_read_u8
266; GCN: ds_read_u8
Matt Arsenaultbd223422015-01-14 01:35:17 +0000267
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000268; GCN: ds_read_u8
269; GCN: ds_read_u8
270; GCN: ds_read_u8
271; GCN: ds_read_u8
Matt Arsenaultbd223422015-01-14 01:35:17 +0000272
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000273; GCN: ds_write_b8
274; GCN: ds_write_b8
275; GCN: ds_write_b8
276; GCN: ds_write_b8
Tom Stellardc7e448c2015-02-04 20:49:51 +0000277
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000278; GCN: ds_write_b8
279; GCN: ds_write_b8
280; GCN: ds_write_b8
281; GCN: ds_write_b8
Tom Stellardc7e448c2015-02-04 20:49:51 +0000282
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000283; GCN: ds_write_b8
284; GCN: ds_write_b8
285; GCN: ds_write_b8
286; GCN: ds_write_b8
Tom Stellardc7e448c2015-02-04 20:49:51 +0000287
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000288; GCN: ds_write_b8
289; GCN: ds_write_b8
290; GCN: ds_write_b8
291; GCN: ds_write_b8
292; GCN: s_endpgm
293define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000294 %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
Matt Arsenault2ba54c32013-10-30 23:30:05 +0000295 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
296 ret void
297}
Matt Arsenault6f2a5262014-07-27 17:46:40 +0000298
Matt Arsenault8af47a02016-07-01 22:55:55 +0000299; SI-LABEL: {{^}}global_unaligned_load_store_v4i32
300; SI: buffer_load_ubyte
301; SI: buffer_load_ubyte
302; SI: buffer_load_ubyte
303; SI: buffer_load_ubyte
304; SI: buffer_load_ubyte
305; SI: buffer_load_ubyte
306; SI: buffer_load_ubyte
307; SI: buffer_load_ubyte
308; SI: buffer_load_ubyte
309; SI: buffer_load_ubyte
310; SI: buffer_load_ubyte
311; SI: buffer_load_ubyte
312; SI: buffer_load_ubyte
313; SI: buffer_load_ubyte
314; SI: buffer_load_ubyte
315; SI: buffer_load_ubyte
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000316
Matt Arsenault8af47a02016-07-01 22:55:55 +0000317; SI: buffer_store_byte
318; SI: buffer_store_byte
319; SI: buffer_store_byte
320; SI: buffer_store_byte
321; SI: buffer_store_byte
322; SI: buffer_store_byte
323; SI: buffer_store_byte
324; SI: buffer_store_byte
325; SI: buffer_store_byte
326; SI: buffer_store_byte
327; SI: buffer_store_byte
328; SI: buffer_store_byte
329; SI: buffer_store_byte
330; SI: buffer_store_byte
331; SI: buffer_store_byte
332; SI: buffer_store_byte
333define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000334 %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
Tom Stellardc6b299c2015-02-02 18:02:28 +0000335 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
336 ret void
337}
338
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000339; FUNC-LABEL: {{^}}local_load_i64_align_4:
340; GCN: ds_read2_b32
341define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000342 %val = load i64, i64 addrspace(3)* %in, align 4
Matt Arsenault6f2a5262014-07-27 17:46:40 +0000343 store i64 %val, i64 addrspace(1)* %out, align 8
344 ret void
345}
346
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000347; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset
348; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
349define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
David Blaikie79e6c742015-02-27 19:29:02 +0000350 %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
David Blaikiea79ac142015-02-27 21:17:42 +0000351 %val = load i64, i64 addrspace(3)* %ptr, align 4
Tom Stellardf3fc5552014-08-22 18:49:35 +0000352 store i64 %val, i64 addrspace(1)* %out, align 8
353 ret void
354}
355
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000356; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset:
Tom Stellardf3fc5552014-08-22 18:49:35 +0000357; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000358; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
359; GCN: s_endpgm
360define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
Tom Stellardf3fc5552014-08-22 18:49:35 +0000361 %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
David Blaikie79e6c742015-02-27 19:29:02 +0000362 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
Tom Stellardf3fc5552014-08-22 18:49:35 +0000363 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
David Blaikiea79ac142015-02-27 21:17:42 +0000364 %val = load i64, i64 addrspace(3)* %ptri64, align 4
Tom Stellardf3fc5552014-08-22 18:49:35 +0000365 store i64 %val, i64 addrspace(1)* %out, align 8
366 ret void
367}
368
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000369; FUNC-LABEL: {{^}}local_load_i64_align_1:
370; GCN: ds_read_u8
371; GCN: ds_read_u8
372; GCN: ds_read_u8
373; GCN: ds_read_u8
374; GCN: ds_read_u8
375; GCN: ds_read_u8
376; GCN: ds_read_u8
377; GCN: ds_read_u8
378; GCN: store_dwordx2
379define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000380 %val = load i64, i64 addrspace(3)* %in, align 1
Tom Stellard4cd6dcd2015-02-02 18:02:23 +0000381 store i64 %val, i64 addrspace(1)* %out, align 8
382 ret void
383}
Tom Stellardf3fc5552014-08-22 18:49:35 +0000384
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000385; FUNC-LABEL: {{^}}local_store_i64_align_4:
386; GCN: ds_write2_b32
387define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
Tom Stellardf3fc5552014-08-22 18:49:35 +0000388 store i64 %val, i64 addrspace(3)* %out, align 4
389 ret void
390}
391
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000392; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset
393; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
394; GCN: s_endpgm
395define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
David Blaikie79e6c742015-02-27 19:29:02 +0000396 %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
Tom Stellardf3fc5552014-08-22 18:49:35 +0000397 store i64 0, i64 addrspace(3)* %ptr, align 4
398 ret void
399}
400
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000401; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset:
Tom Stellardf3fc5552014-08-22 18:49:35 +0000402; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000403; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
404; GCN: s_endpgm
405define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
Tom Stellardf3fc5552014-08-22 18:49:35 +0000406 %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
David Blaikie79e6c742015-02-27 19:29:02 +0000407 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
Tom Stellardf3fc5552014-08-22 18:49:35 +0000408 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
409 store i64 0, i64 addrspace(3)* %out, align 4
410 ret void
411}
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000412
413; FUNC-LABEL: {{^}}constant_load_unaligned_i16:
414; GCN-NOHSA: buffer_load_ushort
415; GCN-HSA: flat_load_ushort
416
417; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
418define void @constant_load_unaligned_i16(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
419entry:
420 %tmp0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
421 %tmp1 = load i16, i16 addrspace(2)* %tmp0
422 %tmp2 = zext i16 %tmp1 to i32
423 store i32 %tmp2, i32 addrspace(1)* %out
424 ret void
425}
426
427; FUNC-LABEL: {{^}}constant_load_unaligned_i32:
428; GCN-NOHSA: buffer_load_ubyte
429; GCN-NOHSA: buffer_load_ubyte
430; GCN-NOHSA: buffer_load_ubyte
431; GCN-NOHSA: buffer_load_ubyte
432
433; GCN-HSA: flat_load_ubyte
434; GCN-HSA: flat_load_ubyte
435; GCN-HSA: flat_load_ubyte
436; GCN-HSA: flat_load_ubyte
437define void @constant_load_unaligned_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
438entry:
439 %tmp0 = load i32, i32 addrspace(2)* %in, align 1
440 store i32 %tmp0, i32 addrspace(1)* %out
441 ret void
442}
443
444; FUNC-LABEL: {{^}}constant_load_unaligned_f32:
445; GCN-NOHSA: buffer_load_ubyte
446; GCN-NOHSA: buffer_load_ubyte
447; GCN-NOHSA: buffer_load_ubyte
448; GCN-NOHSA: buffer_load_ubyte
449
450; GCN-HSA: flat_load_ubyte
451; GCN-HSA: flat_load_ubyte
452; GCN-HSA: flat_load_ubyte
453; GCN-HSA: flat_load_ubyte
454define void @constant_load_unaligned_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
455 %tmp1 = load float, float addrspace(2)* %in, align 1
456 store float %tmp1, float addrspace(1)* %out
457 ret void
458}
459
460attributes #0 = { nounwind }