blob: 2bd9e1664968ad62e1026f986b4d06e3dd25f987 [file] [log] [blame]
Tom Stellard49f8bfd2015-01-06 18:00:21 +00001; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Marek Olsak75170772015-01-27 17:27:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenault46645fa2014-07-28 17:49:26 +00003
Pete Cooper67cf9a72015-11-19 05:56:52 +00004declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
5declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +00006
7
Tom Stellard79243d92014-10-01 17:15:17 +00008; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
Tom Stellard1d5e6d42016-03-30 16:35:13 +00009; SI-DAG: ds_read_u8
10; SI-DAG: ds_read_u8
11; SI-DAG: ds_read_u8
12; SI-DAG: ds_read_u8
13; SI-DAG: ds_read_u8
14; SI-DAG: ds_read_u8
15; SI-DAG: ds_read_u8
16; SI-DAG: ds_read_u8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000017
Tom Stellard1d5e6d42016-03-30 16:35:13 +000018; SI-DAG: ds_read_u8
19; SI-DAG: ds_read_u8
20; SI-DAG: ds_read_u8
21; SI-DAG: ds_read_u8
22; SI-DAG: ds_read_u8
23; SI-DAG: ds_read_u8
24; SI-DAG: ds_read_u8
25; SI-DAG: ds_read_u8
Matt Arsenault46645fa2014-07-28 17:49:26 +000026
Tom Stellard1d5e6d42016-03-30 16:35:13 +000027; SI-DAG: ds_read_u8
28; SI-DAG: ds_read_u8
29; SI-DAG: ds_read_u8
30; SI-DAG: ds_read_u8
31; SI-DAG: ds_read_u8
32; SI-DAG: ds_read_u8
33; SI-DAG: ds_read_u8
34; SI-DAG: ds_read_u8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000035
Tom Stellard1d5e6d42016-03-30 16:35:13 +000036; SI-DAG: ds_read_u8
37; SI-DAG: ds_read_u8
38; SI-DAG: ds_read_u8
39; SI-DAG: ds_read_u8
40; SI-DAG: ds_read_u8
41; SI-DAG: ds_read_u8
42; SI-DAG: ds_read_u8
43; SI-DAG: ds_read_u8
Matt Arsenault46645fa2014-07-28 17:49:26 +000044
Tom Stellard1d5e6d42016-03-30 16:35:13 +000045; SI-DAG: ds_write_b8
46; SI-DAG: ds_write_b8
47; SI-DAG: ds_write_b8
48; SI-DAG: ds_write_b8
49; SI-DAG: ds_write_b8
50; SI-DAG: ds_write_b8
51; SI-DAG: ds_write_b8
52; SI-DAG: ds_write_b8
Tom Stellard83f0bce2015-01-29 16:55:25 +000053
Tom Stellard1d5e6d42016-03-30 16:35:13 +000054; SI-DAG: ds_write_b8
55; SI-DAG: ds_write_b8
56; SI-DAG: ds_write_b8
57; SI-DAG: ds_write_b8
58; SI-DAG: ds_write_b8
59; SI-DAG: ds_write_b8
60; SI-DAG: ds_write_b8
61; SI-DAG: ds_write_b8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000062
Tom Stellard1d5e6d42016-03-30 16:35:13 +000063; SI-DAG: ds_write_b8
64; SI-DAG: ds_write_b8
65; SI-DAG: ds_write_b8
66; SI-DAG: ds_write_b8
67; SI-DAG: ds_write_b8
68; SI-DAG: ds_write_b8
69; SI-DAG: ds_write_b8
70; SI-DAG: ds_write_b8
Tom Stellard83f0bce2015-01-29 16:55:25 +000071
Tom Stellard1d5e6d42016-03-30 16:35:13 +000072; SI-DAG: ds_write_b8
73; SI-DAG: ds_write_b8
74; SI-DAG: ds_write_b8
75; SI-DAG: ds_write_b8
76; SI-DAG: ds_write_b8
77; SI-DAG: ds_write_b8
78; SI-DAG: ds_write_b8
79; SI-DAG: ds_write_b8
Matt Arsenault46645fa2014-07-28 17:49:26 +000080
Tom Stellard326d6ec2014-11-05 14:50:53 +000081; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +000082define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
83 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
84 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
Pete Cooper67cf9a72015-11-19 05:56:52 +000085 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +000086 ret void
87}
88
Tom Stellard79243d92014-10-01 17:15:17 +000089; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
Tom Stellard1d5e6d42016-03-30 16:35:13 +000090; SI-DAG: ds_read_u16
91; SI-DAG: ds_read_u16
92; SI-DAG: ds_read_u16
93; SI-DAG: ds_read_u16
94; SI-DAG: ds_read_u16
95; SI-DAG: ds_read_u16
96; SI-DAG: ds_read_u16
97; SI-DAG: ds_read_u16
Matt Arsenault46645fa2014-07-28 17:49:26 +000098
Tom Stellard1d5e6d42016-03-30 16:35:13 +000099; SI-DAG: ds_read_u16
100; SI-DAG: ds_read_u16
101; SI-DAG: ds_read_u16
102; SI-DAG: ds_read_u16
103; SI-DAG: ds_read_u16
104; SI-DAG: ds_read_u16
105; SI-DAG: ds_read_u16
106; SI-DAG: ds_read_u16
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000107
Tom Stellard1d5e6d42016-03-30 16:35:13 +0000108; SI-DAG: ds_write_b16
109; SI-DAG: ds_write_b16
110; SI-DAG: ds_write_b16
111; SI-DAG: ds_write_b16
112; SI-DAG: ds_write_b16
113; SI-DAG: ds_write_b16
114; SI-DAG: ds_write_b16
115; SI-DAG: ds_write_b16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000116
Tom Stellard1d5e6d42016-03-30 16:35:13 +0000117; SI-DAG: ds_write_b16
118; SI-DAG: ds_write_b16
119; SI-DAG: ds_write_b16
120; SI-DAG: ds_write_b16
121; SI-DAG: ds_write_b16
122; SI-DAG: ds_write_b16
123; SI-DAG: ds_write_b16
124; SI-DAG: ds_write_b16
Matt Arsenaultd5f4de22014-08-06 00:29:49 +0000125
Tom Stellard326d6ec2014-11-05 14:50:53 +0000126; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000127define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
128 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
129 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000130 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000131 ret void
132}
133
Tom Stellard79243d92014-10-01 17:15:17 +0000134; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
Matt Arsenaultff05da82015-11-24 12:18:54 +0000135; SI: ds_read2_b32
136; SI: ds_read2_b32
137; SI: ds_read2_b32
138; SI: ds_read2_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000139
Matt Arsenaultff05da82015-11-24 12:18:54 +0000140; SI: ds_write2_b32
141; SI: ds_write2_b32
142; SI: ds_write2_b32
143; SI: ds_write2_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000144
Tom Stellard326d6ec2014-11-05 14:50:53 +0000145; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000146define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
147 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
148 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000149 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000150 ret void
151}
152
153; FIXME: Use 64-bit ops
Tom Stellard79243d92014-10-01 17:15:17 +0000154; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
Matt Arsenault46645fa2014-07-28 17:49:26 +0000155
Matt Arsenaultff05da82015-11-24 12:18:54 +0000156; SI: ds_read_b64
157; SI: ds_read_b64
158; SI: ds_read_b64
159; SI: ds_read_b64
Matt Arsenault46645fa2014-07-28 17:49:26 +0000160
Matt Arsenaultff05da82015-11-24 12:18:54 +0000161; SI: ds_write_b64
162; SI: ds_write_b64
163; SI: ds_write_b64
164; SI: ds_write_b64
Matt Arsenault46645fa2014-07-28 17:49:26 +0000165
Tom Stellard326d6ec2014-11-05 14:50:53 +0000166; SI-DAG: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000167define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
168 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
169 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000170 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000171 ret void
172}
173
Tom Stellard79243d92014-10-01 17:15:17 +0000174; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000175; SI-DAG: buffer_load_ubyte
176; SI-DAG: buffer_store_byte
177; SI-DAG: buffer_load_ubyte
178; SI-DAG: buffer_store_byte
179; SI-DAG: buffer_load_ubyte
180; SI-DAG: buffer_store_byte
181; SI-DAG: buffer_load_ubyte
182; SI-DAG: buffer_store_byte
183; SI-DAG: buffer_load_ubyte
184; SI-DAG: buffer_store_byte
185; SI-DAG: buffer_load_ubyte
186; SI-DAG: buffer_store_byte
187; SI-DAG: buffer_load_ubyte
188; SI-DAG: buffer_store_byte
189; SI-DAG: buffer_load_ubyte
190; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000191
Tom Stellard326d6ec2014-11-05 14:50:53 +0000192; SI-DAG: buffer_load_ubyte
193; SI-DAG: buffer_store_byte
194; SI-DAG: buffer_load_ubyte
195; SI-DAG: buffer_store_byte
196; SI-DAG: buffer_load_ubyte
197; SI-DAG: buffer_store_byte
198; SI-DAG: buffer_load_ubyte
199; SI-DAG: buffer_store_byte
200; SI-DAG: buffer_load_ubyte
201; SI-DAG: buffer_store_byte
202; SI-DAG: buffer_load_ubyte
203; SI-DAG: buffer_store_byte
204; SI-DAG: buffer_load_ubyte
205; SI-DAG: buffer_store_byte
206; SI-DAG: buffer_load_ubyte
207; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000208
Tom Stellard326d6ec2014-11-05 14:50:53 +0000209; SI-DAG: buffer_load_ubyte
210; SI-DAG: buffer_store_byte
211; SI-DAG: buffer_load_ubyte
212; SI-DAG: buffer_store_byte
213; SI-DAG: buffer_load_ubyte
214; SI-DAG: buffer_store_byte
215; SI-DAG: buffer_load_ubyte
216; SI-DAG: buffer_store_byte
217; SI-DAG: buffer_load_ubyte
218; SI-DAG: buffer_store_byte
219; SI-DAG: buffer_load_ubyte
220; SI-DAG: buffer_store_byte
221; SI-DAG: buffer_load_ubyte
222; SI-DAG: buffer_store_byte
223; SI-DAG: buffer_load_ubyte
224; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000225
Tom Stellard326d6ec2014-11-05 14:50:53 +0000226; SI-DAG: buffer_load_ubyte
227; SI-DAG: buffer_store_byte
228; SI-DAG: buffer_load_ubyte
229; SI-DAG: buffer_store_byte
230; SI-DAG: buffer_load_ubyte
231; SI-DAG: buffer_store_byte
232; SI-DAG: buffer_load_ubyte
233; SI-DAG: buffer_store_byte
234; SI-DAG: buffer_load_ubyte
235; SI-DAG: buffer_store_byte
236; SI-DAG: buffer_load_ubyte
237; SI-DAG: buffer_store_byte
238; SI-DAG: buffer_load_ubyte
239; SI-DAG: buffer_store_byte
240; SI-DAG: buffer_load_ubyte
241; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000242
Tom Stellard326d6ec2014-11-05 14:50:53 +0000243; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000244define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
245 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
246 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000247 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000248 ret void
249}
250
Tom Stellard79243d92014-10-01 17:15:17 +0000251; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000252; SI-DAG: buffer_load_ushort
253; SI-DAG: buffer_load_ushort
254; SI-DAG: buffer_load_ushort
255; SI-DAG: buffer_load_ushort
256; SI-DAG: buffer_load_ushort
257; SI-DAG: buffer_load_ushort
258; SI-DAG: buffer_load_ushort
259; SI-DAG: buffer_load_ushort
260; SI-DAG: buffer_load_ushort
261; SI-DAG: buffer_load_ushort
262; SI-DAG: buffer_load_ushort
263; SI-DAG: buffer_load_ushort
264; SI-DAG: buffer_load_ushort
265; SI-DAG: buffer_load_ushort
266; SI-DAG: buffer_load_ushort
267; SI-DAG: buffer_load_ushort
Matt Arsenault46645fa2014-07-28 17:49:26 +0000268
Tom Stellard326d6ec2014-11-05 14:50:53 +0000269; SI-DAG: buffer_store_short
270; SI-DAG: buffer_store_short
271; SI-DAG: buffer_store_short
272; SI-DAG: buffer_store_short
273; SI-DAG: buffer_store_short
274; SI-DAG: buffer_store_short
275; SI-DAG: buffer_store_short
276; SI-DAG: buffer_store_short
277; SI-DAG: buffer_store_short
278; SI-DAG: buffer_store_short
279; SI-DAG: buffer_store_short
280; SI-DAG: buffer_store_short
281; SI-DAG: buffer_store_short
282; SI-DAG: buffer_store_short
283; SI-DAG: buffer_store_short
284; SI-DAG: buffer_store_short
Matt Arsenault46645fa2014-07-28 17:49:26 +0000285
Tom Stellard326d6ec2014-11-05 14:50:53 +0000286; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000287define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
288 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
289 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000290 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000291 ret void
292}
293
Tom Stellard79243d92014-10-01 17:15:17 +0000294; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000295; SI: buffer_load_dwordx4
296; SI: buffer_load_dwordx4
297; SI: buffer_store_dwordx4
298; SI: buffer_store_dwordx4
299; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000300define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
301 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
302 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000303 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000304 ret void
305}
306
Tom Stellard79243d92014-10-01 17:15:17 +0000307; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000308; SI: buffer_load_dwordx4
309; SI: buffer_load_dwordx4
310; SI: buffer_store_dwordx4
311; SI: buffer_store_dwordx4
312; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000313define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
314 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
315 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000316 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000317 ret void
318}
319
Tom Stellard79243d92014-10-01 17:15:17 +0000320; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000321; SI: buffer_load_dwordx4
322; SI: buffer_load_dwordx4
323; SI: buffer_store_dwordx4
324; SI: buffer_store_dwordx4
325; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000326define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
327 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
328 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000329 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000330 ret void
331}