blob: 5f2710aa37eb4a5cba41b87603ce6bf5c0838803 [file] [log] [blame]
Matt Arsenault46645fa2014-07-28 17:49:26 +00001; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
3declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
4declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
5
6
Tom Stellard79243d92014-10-01 17:15:17 +00007; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
Tom Stellard326d6ec2014-11-05 14:50:53 +00008; SI: ds_read_u8
9; SI: ds_write_b8
10; SI: ds_read_u8
11; SI: ds_write_b8
12; SI: ds_read_u8
13; SI: ds_write_b8
14; SI: ds_read_u8
15; SI: ds_write_b8
16; SI: ds_read_u8
17; SI: ds_write_b8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000018
Tom Stellard326d6ec2014-11-05 14:50:53 +000019; SI: ds_read_u8
20; SI: ds_write_b8
21; SI: ds_read_u8
22; SI: ds_write_b8
23; SI: ds_read_u8
24; SI: ds_write_b8
25; SI: ds_read_u8
26; SI: ds_write_b8
27; SI: ds_read_u8
28; SI: ds_write_b8
Matt Arsenault46645fa2014-07-28 17:49:26 +000029
Tom Stellard326d6ec2014-11-05 14:50:53 +000030; SI: ds_read_u8
31; SI: ds_write_b8
32; SI: ds_read_u8
33; SI: ds_write_b8
34; SI: ds_read_u8
35; SI: ds_write_b8
36; SI: ds_read_u8
37; SI: ds_write_b8
38; SI: ds_read_u8
39; SI: ds_read_u8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000040
Matt Arsenault46645fa2014-07-28 17:49:26 +000041
Tom Stellard326d6ec2014-11-05 14:50:53 +000042; SI: ds_read_u8
43; SI: ds_read_u8
44; SI: ds_read_u8
45; SI: ds_read_u8
46; SI: ds_read_u8
47; SI: ds_read_u8
48; SI: ds_read_u8
49; SI: ds_read_u8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000050
Tom Stellard326d6ec2014-11-05 14:50:53 +000051; SI: ds_read_u8
52; SI: ds_read_u8
53; SI: ds_read_u8
54; SI: ds_read_u8
55; SI: ds_read_u8
56; SI: ds_read_u8
57; SI: ds_read_u8
58; SI: ds_read_u8
Matt Arsenault46645fa2014-07-28 17:49:26 +000059
Tom Stellard326d6ec2014-11-05 14:50:53 +000060; SI: ds_write_b8
61; SI: ds_write_b8
62; SI: ds_write_b8
63; SI: ds_write_b8
64; SI: ds_write_b8
65; SI: ds_write_b8
66; SI: ds_write_b8
67; SI: ds_write_b8
68; SI: ds_write_b8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000069
Tom Stellard326d6ec2014-11-05 14:50:53 +000070; SI: ds_write_b8
71; SI: ds_write_b8
72; SI: ds_write_b8
73; SI: ds_write_b8
74; SI: ds_write_b8
75; SI: ds_write_b8
76; SI: ds_write_b8
77; SI: ds_write_b8
78; SI: ds_write_b8
Matt Arsenault46645fa2014-07-28 17:49:26 +000079
Tom Stellard326d6ec2014-11-05 14:50:53 +000080; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +000081define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
82 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
83 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
84 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
85 ret void
86}
87
Tom Stellard79243d92014-10-01 17:15:17 +000088; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
Tom Stellard326d6ec2014-11-05 14:50:53 +000089; SI: ds_read_u16
90; SI: ds_read_u16
91; SI: ds_read_u16
92; SI: ds_read_u16
93; SI: ds_read_u16
94; SI: ds_read_u16
95; SI: ds_read_u16
96; SI: ds_read_u16
Matt Arsenault46645fa2014-07-28 17:49:26 +000097
Tom Stellard326d6ec2014-11-05 14:50:53 +000098; SI: ds_read_u16
99; SI: ds_read_u16
100; SI: ds_read_u16
101; SI: ds_read_u16
102; SI: ds_read_u16
103; SI: ds_read_u16
104; SI: ds_read_u16
105; SI: ds_read_u16
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000106
Tom Stellard326d6ec2014-11-05 14:50:53 +0000107; SI: ds_write_b16
108; SI: ds_write_b16
109; SI: ds_write_b16
110; SI: ds_write_b16
111; SI: ds_write_b16
112; SI: ds_write_b16
113; SI: ds_write_b16
114; SI: ds_write_b16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000115
Tom Stellard326d6ec2014-11-05 14:50:53 +0000116; SI: ds_write_b16
117; SI: ds_write_b16
118; SI: ds_write_b16
119; SI: ds_write_b16
120; SI: ds_write_b16
121; SI: ds_write_b16
122; SI: ds_write_b16
123; SI: ds_write_b16
Matt Arsenaultd5f4de22014-08-06 00:29:49 +0000124
Tom Stellard326d6ec2014-11-05 14:50:53 +0000125; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000126define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
127 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
128 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
129 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
130 ret void
131}
132
Tom Stellard79243d92014-10-01 17:15:17 +0000133; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000134; SI-DAG: ds_read_b32
135; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000136
Tom Stellard326d6ec2014-11-05 14:50:53 +0000137; SI-DAG: ds_read_b32
138; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000139
Tom Stellard326d6ec2014-11-05 14:50:53 +0000140; SI-DAG: ds_read_b32
141; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000142
Tom Stellard326d6ec2014-11-05 14:50:53 +0000143; SI-DAG: ds_read_b32
144; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000145
Tom Stellard326d6ec2014-11-05 14:50:53 +0000146; SI-DAG: ds_read_b32
147; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000148
Tom Stellard326d6ec2014-11-05 14:50:53 +0000149; SI-DAG: ds_read_b32
150; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000151
Tom Stellard326d6ec2014-11-05 14:50:53 +0000152; SI-DAG: ds_read_b32
153; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000154
Tom Stellard326d6ec2014-11-05 14:50:53 +0000155; SI-DAG: ds_read_b32
156; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000157
Tom Stellard326d6ec2014-11-05 14:50:53 +0000158; SI-DAG: ds_read_b32
159; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000160
Tom Stellard326d6ec2014-11-05 14:50:53 +0000161; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000162define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
163 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
164 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
165 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
166 ret void
167}
168
169; FIXME: Use 64-bit ops
Tom Stellard79243d92014-10-01 17:15:17 +0000170; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
Matt Arsenault46645fa2014-07-28 17:49:26 +0000171
Tom Stellard326d6ec2014-11-05 14:50:53 +0000172; SI-DAG: ds_read_b32
173; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000174
Tom Stellard326d6ec2014-11-05 14:50:53 +0000175; SI-DAG: ds_read_b32
176; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000177
Tom Stellard326d6ec2014-11-05 14:50:53 +0000178; SI-DAG: ds_read_b32
179; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000180
Tom Stellard326d6ec2014-11-05 14:50:53 +0000181; SI-DAG: ds_read_b32
182; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000183
Tom Stellard326d6ec2014-11-05 14:50:53 +0000184; SI-DAG: ds_read_b32
185; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000186
Tom Stellard326d6ec2014-11-05 14:50:53 +0000187; SI-DAG: ds_read_b32
188; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000189
Tom Stellard326d6ec2014-11-05 14:50:53 +0000190; SI-DAG: ds_read_b32
191; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000192
Tom Stellard326d6ec2014-11-05 14:50:53 +0000193; SI-DAG: ds_read_b32
194; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000195
Tom Stellard326d6ec2014-11-05 14:50:53 +0000196; SI-DAG: ds_read_b32
197; SI-DAG: ds_write_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000198
Tom Stellard326d6ec2014-11-05 14:50:53 +0000199; SI-DAG: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000200define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
201 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
202 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
203 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
204 ret void
205}
206
Tom Stellard79243d92014-10-01 17:15:17 +0000207; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000208; SI-DAG: buffer_load_ubyte
209; SI-DAG: buffer_store_byte
210; SI-DAG: buffer_load_ubyte
211; SI-DAG: buffer_store_byte
212; SI-DAG: buffer_load_ubyte
213; SI-DAG: buffer_store_byte
214; SI-DAG: buffer_load_ubyte
215; SI-DAG: buffer_store_byte
216; SI-DAG: buffer_load_ubyte
217; SI-DAG: buffer_store_byte
218; SI-DAG: buffer_load_ubyte
219; SI-DAG: buffer_store_byte
220; SI-DAG: buffer_load_ubyte
221; SI-DAG: buffer_store_byte
222; SI-DAG: buffer_load_ubyte
223; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000224
Tom Stellard326d6ec2014-11-05 14:50:53 +0000225; SI-DAG: buffer_load_ubyte
226; SI-DAG: buffer_store_byte
227; SI-DAG: buffer_load_ubyte
228; SI-DAG: buffer_store_byte
229; SI-DAG: buffer_load_ubyte
230; SI-DAG: buffer_store_byte
231; SI-DAG: buffer_load_ubyte
232; SI-DAG: buffer_store_byte
233; SI-DAG: buffer_load_ubyte
234; SI-DAG: buffer_store_byte
235; SI-DAG: buffer_load_ubyte
236; SI-DAG: buffer_store_byte
237; SI-DAG: buffer_load_ubyte
238; SI-DAG: buffer_store_byte
239; SI-DAG: buffer_load_ubyte
240; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000241
Tom Stellard326d6ec2014-11-05 14:50:53 +0000242; SI-DAG: buffer_load_ubyte
243; SI-DAG: buffer_store_byte
244; SI-DAG: buffer_load_ubyte
245; SI-DAG: buffer_store_byte
246; SI-DAG: buffer_load_ubyte
247; SI-DAG: buffer_store_byte
248; SI-DAG: buffer_load_ubyte
249; SI-DAG: buffer_store_byte
250; SI-DAG: buffer_load_ubyte
251; SI-DAG: buffer_store_byte
252; SI-DAG: buffer_load_ubyte
253; SI-DAG: buffer_store_byte
254; SI-DAG: buffer_load_ubyte
255; SI-DAG: buffer_store_byte
256; SI-DAG: buffer_load_ubyte
257; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000258
Tom Stellard326d6ec2014-11-05 14:50:53 +0000259; SI-DAG: buffer_load_ubyte
260; SI-DAG: buffer_store_byte
261; SI-DAG: buffer_load_ubyte
262; SI-DAG: buffer_store_byte
263; SI-DAG: buffer_load_ubyte
264; SI-DAG: buffer_store_byte
265; SI-DAG: buffer_load_ubyte
266; SI-DAG: buffer_store_byte
267; SI-DAG: buffer_load_ubyte
268; SI-DAG: buffer_store_byte
269; SI-DAG: buffer_load_ubyte
270; SI-DAG: buffer_store_byte
271; SI-DAG: buffer_load_ubyte
272; SI-DAG: buffer_store_byte
273; SI-DAG: buffer_load_ubyte
274; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000275
Tom Stellard326d6ec2014-11-05 14:50:53 +0000276; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000277define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
278 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
279 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
280 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
281 ret void
282}
283
Tom Stellard79243d92014-10-01 17:15:17 +0000284; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000285; SI-DAG: buffer_load_ushort
286; SI-DAG: buffer_load_ushort
287; SI-DAG: buffer_load_ushort
288; SI-DAG: buffer_load_ushort
289; SI-DAG: buffer_load_ushort
290; SI-DAG: buffer_load_ushort
291; SI-DAG: buffer_load_ushort
292; SI-DAG: buffer_load_ushort
293; SI-DAG: buffer_load_ushort
294; SI-DAG: buffer_load_ushort
295; SI-DAG: buffer_load_ushort
296; SI-DAG: buffer_load_ushort
297; SI-DAG: buffer_load_ushort
298; SI-DAG: buffer_load_ushort
299; SI-DAG: buffer_load_ushort
300; SI-DAG: buffer_load_ushort
Matt Arsenault46645fa2014-07-28 17:49:26 +0000301
Tom Stellard326d6ec2014-11-05 14:50:53 +0000302; SI-DAG: buffer_store_short
303; SI-DAG: buffer_store_short
304; SI-DAG: buffer_store_short
305; SI-DAG: buffer_store_short
306; SI-DAG: buffer_store_short
307; SI-DAG: buffer_store_short
308; SI-DAG: buffer_store_short
309; SI-DAG: buffer_store_short
310; SI-DAG: buffer_store_short
311; SI-DAG: buffer_store_short
312; SI-DAG: buffer_store_short
313; SI-DAG: buffer_store_short
314; SI-DAG: buffer_store_short
315; SI-DAG: buffer_store_short
316; SI-DAG: buffer_store_short
317; SI-DAG: buffer_store_short
Matt Arsenault46645fa2014-07-28 17:49:26 +0000318
Tom Stellard326d6ec2014-11-05 14:50:53 +0000319; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000320define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
321 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
322 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
323 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
324 ret void
325}
326
Tom Stellard79243d92014-10-01 17:15:17 +0000327; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000328; SI: buffer_load_dwordx4
329; SI: buffer_load_dwordx4
330; SI: buffer_store_dwordx4
331; SI: buffer_store_dwordx4
332; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000333define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
334 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
335 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
336 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
337 ret void
338}
339
Tom Stellard79243d92014-10-01 17:15:17 +0000340; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000341; SI: buffer_load_dwordx4
342; SI: buffer_load_dwordx4
343; SI: buffer_store_dwordx4
344; SI: buffer_store_dwordx4
345; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000346define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
347 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
348 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
349 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
350 ret void
351}
352
Tom Stellard79243d92014-10-01 17:15:17 +0000353; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000354; SI: buffer_load_dwordx4
355; SI: buffer_load_dwordx4
356; SI: buffer_store_dwordx4
357; SI: buffer_store_dwordx4
358; SI: s_endpgm
Matt Arsenault46645fa2014-07-28 17:49:26 +0000359define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
360 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
361 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
362 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
363 ret void
364}