blob: cd8b532a79267abd3478fdb7b5679ba2ad710a99 [file] [log] [blame]
Matt Arsenault46645fa2014-07-28 17:49:26 +00001; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
3declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
4declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
5
6
7; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align1
8; SI: DS_READ_U8
9; SI: DS_WRITE_B8
10; SI: DS_READ_U8
11; SI: DS_WRITE_B8
12; SI: DS_READ_U8
13; SI: DS_WRITE_B8
14; SI: DS_READ_U8
15; SI: DS_WRITE_B8
16; SI: DS_READ_U8
17; SI: DS_WRITE_B8
18; SI: DS_READ_U8
19; SI: DS_WRITE_B8
20; SI: DS_READ_U8
21; SI: DS_WRITE_B8
22; SI: DS_READ_U8
23; SI: DS_WRITE_B8
24
25; SI: DS_READ_U8
26; SI: DS_WRITE_B8
27; SI: DS_READ_U8
28; SI: DS_WRITE_B8
29; SI: DS_READ_U8
30; SI: DS_WRITE_B8
31; SI: DS_READ_U8
32; SI: DS_WRITE_B8
33; SI: DS_READ_U8
34; SI: DS_WRITE_B8
35; SI: DS_READ_U8
36; SI: DS_WRITE_B8
37; SI: DS_READ_U8
38; SI: DS_WRITE_B8
39; SI: DS_READ_U8
40; SI: DS_WRITE_B8
41
42; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000043; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000044; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000045; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000046; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000047; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000048; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000049; SI: DS_READ_U8
Matt Arsenaultc10853f2014-08-06 00:29:43 +000050; SI: DS_READ_U8
51; SI: DS_READ_U8
52; SI: DS_READ_U8
53; SI: DS_READ_U8
54; SI: DS_READ_U8
55; SI: DS_READ_U8
56; SI: DS_READ_U8
57; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000058
Matt Arsenault46645fa2014-07-28 17:49:26 +000059; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000060; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000061; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000062; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000063; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000064; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000065; SI: DS_WRITE_B8
Matt Arsenaultc10853f2014-08-06 00:29:43 +000066; SI: DS_WRITE_B8
67; SI: DS_WRITE_B8
68; SI: DS_WRITE_B8
69; SI: DS_WRITE_B8
70; SI: DS_WRITE_B8
71; SI: DS_WRITE_B8
72; SI: DS_WRITE_B8
73; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000074; SI: DS_WRITE_B8
75
76; SI: S_ENDPGM
77define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
78 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
79 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
80 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
81 ret void
82}
83
84; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align2
85; SI: DS_READ_U16
86; SI: DS_WRITE_B16
87; SI: DS_READ_U16
88; SI: DS_WRITE_B16
89; SI: DS_READ_U16
90; SI: DS_WRITE_B16
91; SI: DS_READ_U16
92; SI: DS_WRITE_B16
93; SI: DS_READ_U16
94; SI: DS_WRITE_B16
95; SI: DS_READ_U16
96; SI: DS_WRITE_B16
97; SI: DS_READ_U16
98; SI: DS_WRITE_B16
99; SI: DS_READ_U16
100; SI: DS_WRITE_B16
101
102; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000103; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000104; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000105; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000106; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000107; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000108; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000109; SI: DS_READ_U16
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000110
111; SI: DS_WRITE_B16
112; SI: DS_WRITE_B16
113; SI: DS_WRITE_B16
114; SI: DS_WRITE_B16
115; SI: DS_WRITE_B16
116; SI: DS_WRITE_B16
117; SI: DS_WRITE_B16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000118; SI: DS_WRITE_B16
119
120; SI: S_ENDPGM
121define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
122 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
123 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
124 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
125 ret void
126}
127
128; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align4
129; SI-DAG: DS_READ_B32
130; SI-DAG: DS_WRITE_B32
131
132; SI-DAG: DS_READ_B32
133; SI-DAG: DS_WRITE_B32
134
135; SI-DAG: DS_READ_B32
136; SI-DAG: DS_WRITE_B32
137
138; SI-DAG: DS_READ_B32
139; SI-DAG: DS_WRITE_B32
140
141; SI-DAG: DS_READ_B32
142; SI-DAG: DS_WRITE_B32
143
144; SI-DAG: DS_READ_B32
145; SI-DAG: DS_WRITE_B32
146
147; SI-DAG: DS_READ_B32
148; SI-DAG: DS_WRITE_B32
149
150; SI-DAG: DS_READ_B32
151; SI-DAG: DS_WRITE_B32
152
153; SI-DAG: DS_READ_B32
154; SI-DAG: DS_WRITE_B32
155
156; SI: S_ENDPGM
157define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
158 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
159 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
160 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
161 ret void
162}
163
164; FIXME: Use 64-bit ops
165; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align8
166
167; SI-DAG: DS_READ_B32
168; SI-DAG: DS_WRITE_B32
169
170; SI-DAG: DS_READ_B32
171; SI-DAG: DS_WRITE_B32
172
173; SI-DAG: DS_READ_B32
174; SI-DAG: DS_WRITE_B32
175
176; SI-DAG: DS_READ_B32
177; SI-DAG: DS_WRITE_B32
178
179; SI-DAG: DS_READ_B32
180; SI-DAG: DS_WRITE_B32
181
182; SI-DAG: DS_READ_B32
183; SI-DAG: DS_WRITE_B32
184
185; SI-DAG: DS_READ_B32
186; SI-DAG: DS_WRITE_B32
187
188; SI-DAG: DS_READ_B32
189; SI-DAG: DS_WRITE_B32
190
191; SI-DAG: DS_READ_B32
192; SI-DAG: DS_WRITE_B32
193
194; SI-DAG: S_ENDPGM
195define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
196 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
197 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
198 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
199 ret void
200}
201
202; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align1
203; SI-DAG: BUFFER_LOAD_UBYTE
204; SI-DAG: BUFFER_STORE_BYTE
205; SI-DAG: BUFFER_LOAD_UBYTE
206; SI-DAG: BUFFER_STORE_BYTE
207; SI-DAG: BUFFER_LOAD_UBYTE
208; SI-DAG: BUFFER_STORE_BYTE
209; SI-DAG: BUFFER_LOAD_UBYTE
210; SI-DAG: BUFFER_STORE_BYTE
211; SI-DAG: BUFFER_LOAD_UBYTE
212; SI-DAG: BUFFER_STORE_BYTE
213; SI-DAG: BUFFER_LOAD_UBYTE
214; SI-DAG: BUFFER_STORE_BYTE
215; SI-DAG: BUFFER_LOAD_UBYTE
216; SI-DAG: BUFFER_STORE_BYTE
217; SI-DAG: BUFFER_LOAD_UBYTE
218; SI-DAG: BUFFER_STORE_BYTE
219
220; SI-DAG: BUFFER_LOAD_UBYTE
221; SI-DAG: BUFFER_STORE_BYTE
222; SI-DAG: BUFFER_LOAD_UBYTE
223; SI-DAG: BUFFER_STORE_BYTE
224; SI-DAG: BUFFER_LOAD_UBYTE
225; SI-DAG: BUFFER_STORE_BYTE
226; SI-DAG: BUFFER_LOAD_UBYTE
227; SI-DAG: BUFFER_STORE_BYTE
228; SI-DAG: BUFFER_LOAD_UBYTE
229; SI-DAG: BUFFER_STORE_BYTE
230; SI-DAG: BUFFER_LOAD_UBYTE
231; SI-DAG: BUFFER_STORE_BYTE
232; SI-DAG: BUFFER_LOAD_UBYTE
233; SI-DAG: BUFFER_STORE_BYTE
234; SI-DAG: BUFFER_LOAD_UBYTE
235; SI-DAG: BUFFER_STORE_BYTE
236
237; SI-DAG: BUFFER_LOAD_UBYTE
238; SI-DAG: BUFFER_STORE_BYTE
239; SI-DAG: BUFFER_LOAD_UBYTE
240; SI-DAG: BUFFER_STORE_BYTE
241; SI-DAG: BUFFER_LOAD_UBYTE
242; SI-DAG: BUFFER_STORE_BYTE
243; SI-DAG: BUFFER_LOAD_UBYTE
244; SI-DAG: BUFFER_STORE_BYTE
245; SI-DAG: BUFFER_LOAD_UBYTE
246; SI-DAG: BUFFER_STORE_BYTE
247; SI-DAG: BUFFER_LOAD_UBYTE
248; SI-DAG: BUFFER_STORE_BYTE
249; SI-DAG: BUFFER_LOAD_UBYTE
250; SI-DAG: BUFFER_STORE_BYTE
251; SI-DAG: BUFFER_LOAD_UBYTE
252; SI-DAG: BUFFER_STORE_BYTE
253
254; SI-DAG: BUFFER_LOAD_UBYTE
255; SI-DAG: BUFFER_STORE_BYTE
256; SI-DAG: BUFFER_LOAD_UBYTE
257; SI-DAG: BUFFER_STORE_BYTE
258; SI-DAG: BUFFER_LOAD_UBYTE
259; SI-DAG: BUFFER_STORE_BYTE
260; SI-DAG: BUFFER_LOAD_UBYTE
261; SI-DAG: BUFFER_STORE_BYTE
262; SI-DAG: BUFFER_LOAD_UBYTE
263; SI-DAG: BUFFER_STORE_BYTE
264; SI-DAG: BUFFER_LOAD_UBYTE
265; SI-DAG: BUFFER_STORE_BYTE
266; SI-DAG: BUFFER_LOAD_UBYTE
267; SI-DAG: BUFFER_STORE_BYTE
268; SI-DAG: BUFFER_LOAD_UBYTE
269; SI-DAG: BUFFER_STORE_BYTE
270
271; SI: S_ENDPGM
272define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
273 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
274 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
275 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
276 ret void
277}
278
279; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align2
280; SI-DAG: BUFFER_LOAD_USHORT
281; SI-DAG: BUFFER_STORE_SHORT
282; SI-DAG: BUFFER_LOAD_USHORT
283; SI-DAG: BUFFER_STORE_SHORT
284; SI-DAG: BUFFER_LOAD_USHORT
285; SI-DAG: BUFFER_STORE_SHORT
286; SI-DAG: BUFFER_LOAD_USHORT
287; SI-DAG: BUFFER_STORE_SHORT
288; SI-DAG: BUFFER_LOAD_USHORT
289; SI-DAG: BUFFER_STORE_SHORT
290; SI-DAG: BUFFER_LOAD_USHORT
291; SI-DAG: BUFFER_STORE_SHORT
292; SI-DAG: BUFFER_LOAD_USHORT
293; SI-DAG: BUFFER_STORE_SHORT
294; SI-DAG: BUFFER_LOAD_USHORT
295; SI-DAG: BUFFER_STORE_SHORT
296
297; SI-DAG: BUFFER_LOAD_USHORT
298; SI-DAG: BUFFER_STORE_SHORT
299; SI-DAG: BUFFER_LOAD_USHORT
300; SI-DAG: BUFFER_STORE_SHORT
301; SI-DAG: BUFFER_LOAD_USHORT
302; SI-DAG: BUFFER_STORE_SHORT
303; SI-DAG: BUFFER_LOAD_USHORT
304; SI-DAG: BUFFER_STORE_SHORT
305; SI-DAG: BUFFER_LOAD_USHORT
306; SI-DAG: BUFFER_STORE_SHORT
307; SI-DAG: BUFFER_LOAD_USHORT
308; SI-DAG: BUFFER_STORE_SHORT
309; SI-DAG: BUFFER_LOAD_USHORT
310; SI-DAG: BUFFER_STORE_SHORT
311; SI-DAG: BUFFER_LOAD_USHORT
312; SI-DAG: BUFFER_STORE_SHORT
313
314; SI: S_ENDPGM
315define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
316 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
317 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
318 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
319 ret void
320}
321
322; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align4
323; SI: BUFFER_LOAD_DWORDX4
324; SI: BUFFER_STORE_DWORDX4
325; SI: BUFFER_LOAD_DWORDX4
326; SI: BUFFER_STORE_DWORDX4
327; SI: S_ENDPGM
328define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
329 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
330 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
331 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
332 ret void
333}
334
335; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align8
336; SI: BUFFER_LOAD_DWORDX4
337; SI: BUFFER_STORE_DWORDX4
338; SI: BUFFER_LOAD_DWORDX4
339; SI: BUFFER_STORE_DWORDX4
340; SI: S_ENDPGM
341define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
342 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
343 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
344 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
345 ret void
346}
347
348; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align16
349; SI: BUFFER_LOAD_DWORDX4
350; SI: BUFFER_STORE_DWORDX4
351; SI: BUFFER_LOAD_DWORDX4
352; SI: BUFFER_STORE_DWORDX4
353; SI: S_ENDPGM
354define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
355 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
356 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
357 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
358 ret void
359}