blob: 6e9a51ef4ddf05f89cf1cb24dce0416fa6c0f4fb [file] [log] [blame]
Matt Arsenault46645fa2014-07-28 17:49:26 +00001; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
3declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
4declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
5
6
7; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align1
8; SI: DS_READ_U8
9; SI: DS_WRITE_B8
10; SI: DS_READ_U8
11; SI: DS_WRITE_B8
12; SI: DS_READ_U8
13; SI: DS_WRITE_B8
14; SI: DS_READ_U8
15; SI: DS_WRITE_B8
16; SI: DS_READ_U8
17; SI: DS_WRITE_B8
18; SI: DS_READ_U8
19; SI: DS_WRITE_B8
20; SI: DS_READ_U8
21; SI: DS_WRITE_B8
22; SI: DS_READ_U8
23; SI: DS_WRITE_B8
24
25; SI: DS_READ_U8
26; SI: DS_WRITE_B8
27; SI: DS_READ_U8
28; SI: DS_WRITE_B8
29; SI: DS_READ_U8
30; SI: DS_WRITE_B8
31; SI: DS_READ_U8
32; SI: DS_WRITE_B8
33; SI: DS_READ_U8
34; SI: DS_WRITE_B8
35; SI: DS_READ_U8
36; SI: DS_WRITE_B8
37; SI: DS_READ_U8
38; SI: DS_WRITE_B8
39; SI: DS_READ_U8
40; SI: DS_WRITE_B8
41
42; SI: DS_READ_U8
43; SI: DS_WRITE_B8
44; SI: DS_READ_U8
45; SI: DS_WRITE_B8
46; SI: DS_READ_U8
47; SI: DS_WRITE_B8
48; SI: DS_READ_U8
49; SI: DS_WRITE_B8
50; SI: DS_READ_U8
51; SI: DS_WRITE_B8
52; SI: DS_READ_U8
53; SI: DS_WRITE_B8
54; SI: DS_READ_U8
55; SI: DS_WRITE_B8
56; SI: DS_READ_U8
57; SI: DS_WRITE_B8
58
59; SI: DS_READ_U8
60; SI: DS_WRITE_B8
61; SI: DS_READ_U8
62; SI: DS_WRITE_B8
63; SI: DS_READ_U8
64; SI: DS_WRITE_B8
65; SI: DS_READ_U8
66; SI: DS_WRITE_B8
67; SI: DS_READ_U8
68; SI: DS_WRITE_B8
69; SI: DS_READ_U8
70; SI: DS_WRITE_B8
71; SI: DS_READ_U8
72; SI: DS_WRITE_B8
73; SI: DS_READ_U8
74; SI: DS_WRITE_B8
75
76; SI: S_ENDPGM
77define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
78 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
79 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
80 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
81 ret void
82}
83
84; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align2
85; SI: DS_READ_U16
86; SI: DS_WRITE_B16
87; SI: DS_READ_U16
88; SI: DS_WRITE_B16
89; SI: DS_READ_U16
90; SI: DS_WRITE_B16
91; SI: DS_READ_U16
92; SI: DS_WRITE_B16
93; SI: DS_READ_U16
94; SI: DS_WRITE_B16
95; SI: DS_READ_U16
96; SI: DS_WRITE_B16
97; SI: DS_READ_U16
98; SI: DS_WRITE_B16
99; SI: DS_READ_U16
100; SI: DS_WRITE_B16
101
102; SI: DS_READ_U16
103; SI: DS_WRITE_B16
104; SI: DS_READ_U16
105; SI: DS_WRITE_B16
106; SI: DS_READ_U16
107; SI: DS_WRITE_B16
108; SI: DS_READ_U16
109; SI: DS_WRITE_B16
110; SI: DS_READ_U16
111; SI: DS_WRITE_B16
112; SI: DS_READ_U16
113; SI: DS_WRITE_B16
114; SI: DS_READ_U16
115; SI: DS_WRITE_B16
116; SI: DS_READ_U16
117; SI: DS_WRITE_B16
118
119; SI: S_ENDPGM
120define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
121 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
122 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
123 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
124 ret void
125}
126
127; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align4
128; SI-DAG: DS_READ_B32
129; SI-DAG: DS_WRITE_B32
130
131; SI-DAG: DS_READ_B32
132; SI-DAG: DS_WRITE_B32
133
134; SI-DAG: DS_READ_B32
135; SI-DAG: DS_WRITE_B32
136
137; SI-DAG: DS_READ_B32
138; SI-DAG: DS_WRITE_B32
139
140; SI-DAG: DS_READ_B32
141; SI-DAG: DS_WRITE_B32
142
143; SI-DAG: DS_READ_B32
144; SI-DAG: DS_WRITE_B32
145
146; SI-DAG: DS_READ_B32
147; SI-DAG: DS_WRITE_B32
148
149; SI-DAG: DS_READ_B32
150; SI-DAG: DS_WRITE_B32
151
152; SI-DAG: DS_READ_B32
153; SI-DAG: DS_WRITE_B32
154
155; SI: S_ENDPGM
156define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
157 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
158 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
159 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
160 ret void
161}
162
163; FIXME: Use 64-bit ops
164; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align8
165
166; SI-DAG: DS_READ_B32
167; SI-DAG: DS_WRITE_B32
168
169; SI-DAG: DS_READ_B32
170; SI-DAG: DS_WRITE_B32
171
172; SI-DAG: DS_READ_B32
173; SI-DAG: DS_WRITE_B32
174
175; SI-DAG: DS_READ_B32
176; SI-DAG: DS_WRITE_B32
177
178; SI-DAG: DS_READ_B32
179; SI-DAG: DS_WRITE_B32
180
181; SI-DAG: DS_READ_B32
182; SI-DAG: DS_WRITE_B32
183
184; SI-DAG: DS_READ_B32
185; SI-DAG: DS_WRITE_B32
186
187; SI-DAG: DS_READ_B32
188; SI-DAG: DS_WRITE_B32
189
190; SI-DAG: DS_READ_B32
191; SI-DAG: DS_WRITE_B32
192
193; SI-DAG: S_ENDPGM
194define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
195 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
196 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
197 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
198 ret void
199}
200
201; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align1
202; SI-DAG: BUFFER_LOAD_UBYTE
203; SI-DAG: BUFFER_STORE_BYTE
204; SI-DAG: BUFFER_LOAD_UBYTE
205; SI-DAG: BUFFER_STORE_BYTE
206; SI-DAG: BUFFER_LOAD_UBYTE
207; SI-DAG: BUFFER_STORE_BYTE
208; SI-DAG: BUFFER_LOAD_UBYTE
209; SI-DAG: BUFFER_STORE_BYTE
210; SI-DAG: BUFFER_LOAD_UBYTE
211; SI-DAG: BUFFER_STORE_BYTE
212; SI-DAG: BUFFER_LOAD_UBYTE
213; SI-DAG: BUFFER_STORE_BYTE
214; SI-DAG: BUFFER_LOAD_UBYTE
215; SI-DAG: BUFFER_STORE_BYTE
216; SI-DAG: BUFFER_LOAD_UBYTE
217; SI-DAG: BUFFER_STORE_BYTE
218
219; SI-DAG: BUFFER_LOAD_UBYTE
220; SI-DAG: BUFFER_STORE_BYTE
221; SI-DAG: BUFFER_LOAD_UBYTE
222; SI-DAG: BUFFER_STORE_BYTE
223; SI-DAG: BUFFER_LOAD_UBYTE
224; SI-DAG: BUFFER_STORE_BYTE
225; SI-DAG: BUFFER_LOAD_UBYTE
226; SI-DAG: BUFFER_STORE_BYTE
227; SI-DAG: BUFFER_LOAD_UBYTE
228; SI-DAG: BUFFER_STORE_BYTE
229; SI-DAG: BUFFER_LOAD_UBYTE
230; SI-DAG: BUFFER_STORE_BYTE
231; SI-DAG: BUFFER_LOAD_UBYTE
232; SI-DAG: BUFFER_STORE_BYTE
233; SI-DAG: BUFFER_LOAD_UBYTE
234; SI-DAG: BUFFER_STORE_BYTE
235
236; SI-DAG: BUFFER_LOAD_UBYTE
237; SI-DAG: BUFFER_STORE_BYTE
238; SI-DAG: BUFFER_LOAD_UBYTE
239; SI-DAG: BUFFER_STORE_BYTE
240; SI-DAG: BUFFER_LOAD_UBYTE
241; SI-DAG: BUFFER_STORE_BYTE
242; SI-DAG: BUFFER_LOAD_UBYTE
243; SI-DAG: BUFFER_STORE_BYTE
244; SI-DAG: BUFFER_LOAD_UBYTE
245; SI-DAG: BUFFER_STORE_BYTE
246; SI-DAG: BUFFER_LOAD_UBYTE
247; SI-DAG: BUFFER_STORE_BYTE
248; SI-DAG: BUFFER_LOAD_UBYTE
249; SI-DAG: BUFFER_STORE_BYTE
250; SI-DAG: BUFFER_LOAD_UBYTE
251; SI-DAG: BUFFER_STORE_BYTE
252
253; SI-DAG: BUFFER_LOAD_UBYTE
254; SI-DAG: BUFFER_STORE_BYTE
255; SI-DAG: BUFFER_LOAD_UBYTE
256; SI-DAG: BUFFER_STORE_BYTE
257; SI-DAG: BUFFER_LOAD_UBYTE
258; SI-DAG: BUFFER_STORE_BYTE
259; SI-DAG: BUFFER_LOAD_UBYTE
260; SI-DAG: BUFFER_STORE_BYTE
261; SI-DAG: BUFFER_LOAD_UBYTE
262; SI-DAG: BUFFER_STORE_BYTE
263; SI-DAG: BUFFER_LOAD_UBYTE
264; SI-DAG: BUFFER_STORE_BYTE
265; SI-DAG: BUFFER_LOAD_UBYTE
266; SI-DAG: BUFFER_STORE_BYTE
267; SI-DAG: BUFFER_LOAD_UBYTE
268; SI-DAG: BUFFER_STORE_BYTE
269
270; SI: S_ENDPGM
271define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
272 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
273 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
274 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
275 ret void
276}
277
278; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align2
279; SI-DAG: BUFFER_LOAD_USHORT
280; SI-DAG: BUFFER_STORE_SHORT
281; SI-DAG: BUFFER_LOAD_USHORT
282; SI-DAG: BUFFER_STORE_SHORT
283; SI-DAG: BUFFER_LOAD_USHORT
284; SI-DAG: BUFFER_STORE_SHORT
285; SI-DAG: BUFFER_LOAD_USHORT
286; SI-DAG: BUFFER_STORE_SHORT
287; SI-DAG: BUFFER_LOAD_USHORT
288; SI-DAG: BUFFER_STORE_SHORT
289; SI-DAG: BUFFER_LOAD_USHORT
290; SI-DAG: BUFFER_STORE_SHORT
291; SI-DAG: BUFFER_LOAD_USHORT
292; SI-DAG: BUFFER_STORE_SHORT
293; SI-DAG: BUFFER_LOAD_USHORT
294; SI-DAG: BUFFER_STORE_SHORT
295
296; SI-DAG: BUFFER_LOAD_USHORT
297; SI-DAG: BUFFER_STORE_SHORT
298; SI-DAG: BUFFER_LOAD_USHORT
299; SI-DAG: BUFFER_STORE_SHORT
300; SI-DAG: BUFFER_LOAD_USHORT
301; SI-DAG: BUFFER_STORE_SHORT
302; SI-DAG: BUFFER_LOAD_USHORT
303; SI-DAG: BUFFER_STORE_SHORT
304; SI-DAG: BUFFER_LOAD_USHORT
305; SI-DAG: BUFFER_STORE_SHORT
306; SI-DAG: BUFFER_LOAD_USHORT
307; SI-DAG: BUFFER_STORE_SHORT
308; SI-DAG: BUFFER_LOAD_USHORT
309; SI-DAG: BUFFER_STORE_SHORT
310; SI-DAG: BUFFER_LOAD_USHORT
311; SI-DAG: BUFFER_STORE_SHORT
312
313; SI: S_ENDPGM
314define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
315 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
316 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
317 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
318 ret void
319}
320
321; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align4
322; SI: BUFFER_LOAD_DWORDX4
323; SI: BUFFER_STORE_DWORDX4
324; SI: BUFFER_LOAD_DWORDX4
325; SI: BUFFER_STORE_DWORDX4
326; SI: S_ENDPGM
327define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
328 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
329 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
330 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
331 ret void
332}
333
334; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align8
335; SI: BUFFER_LOAD_DWORDX4
336; SI: BUFFER_STORE_DWORDX4
337; SI: BUFFER_LOAD_DWORDX4
338; SI: BUFFER_STORE_DWORDX4
339; SI: S_ENDPGM
340define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
341 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
342 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
343 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
344 ret void
345}
346
347; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align16
348; SI: BUFFER_LOAD_DWORDX4
349; SI: BUFFER_STORE_DWORDX4
350; SI: BUFFER_LOAD_DWORDX4
351; SI: BUFFER_STORE_DWORDX4
352; SI: S_ENDPGM
353define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
354 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
355 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
356 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
357 ret void
358}