blob: 68a4050ce37ba82eec82fb71b9cb520526a16694 [file] [log] [blame]
Matt Arsenault46645fa2014-07-28 17:49:26 +00001; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
3declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
4declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
5
6
7; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align1
8; SI: DS_READ_U8
9; SI: DS_WRITE_B8
10; SI: DS_READ_U8
11; SI: DS_WRITE_B8
12; SI: DS_READ_U8
13; SI: DS_WRITE_B8
14; SI: DS_READ_U8
15; SI: DS_WRITE_B8
16; SI: DS_READ_U8
17; SI: DS_WRITE_B8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000018
19; SI: DS_READ_U8
20; SI: DS_WRITE_B8
21; SI: DS_READ_U8
22; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000023; SI: DS_READ_U8
24; SI: DS_WRITE_B8
25; SI: DS_READ_U8
26; SI: DS_WRITE_B8
27; SI: DS_READ_U8
28; SI: DS_WRITE_B8
29
30; SI: DS_READ_U8
31; SI: DS_WRITE_B8
32; SI: DS_READ_U8
33; SI: DS_WRITE_B8
34; SI: DS_READ_U8
35; SI: DS_WRITE_B8
36; SI: DS_READ_U8
37; SI: DS_WRITE_B8
38; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000039; SI: DS_READ_U8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000040
Matt Arsenault46645fa2014-07-28 17:49:26 +000041
42; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000043; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000044; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000045; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000046; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000047; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000048; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000049; SI: DS_READ_U8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000050
Matt Arsenaultc10853f2014-08-06 00:29:43 +000051; SI: DS_READ_U8
52; SI: DS_READ_U8
53; SI: DS_READ_U8
54; SI: DS_READ_U8
55; SI: DS_READ_U8
56; SI: DS_READ_U8
57; SI: DS_READ_U8
58; SI: DS_READ_U8
Matt Arsenault46645fa2014-07-28 17:49:26 +000059
Matt Arsenault46645fa2014-07-28 17:49:26 +000060; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000061; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000062; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000063; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000064; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000065; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000066; SI: DS_WRITE_B8
Matt Arsenaultc10853f2014-08-06 00:29:43 +000067; SI: DS_WRITE_B8
68; SI: DS_WRITE_B8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000069
70; SI: DS_WRITE_B8
71; SI: DS_WRITE_B8
Matt Arsenaultc10853f2014-08-06 00:29:43 +000072; SI: DS_WRITE_B8
73; SI: DS_WRITE_B8
74; SI: DS_WRITE_B8
75; SI: DS_WRITE_B8
76; SI: DS_WRITE_B8
77; SI: DS_WRITE_B8
Matt Arsenault46645fa2014-07-28 17:49:26 +000078; SI: DS_WRITE_B8
79
80; SI: S_ENDPGM
81define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
82 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
83 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
84 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
85 ret void
86}
87
88; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align2
89; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +000090; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +000091; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +000092; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +000093; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +000094; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +000095; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +000096; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +000097
98; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +000099; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000100; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000101; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000102; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000103; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000104; SI: DS_READ_U16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000105; SI: DS_READ_U16
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000106
107; SI: DS_WRITE_B16
108; SI: DS_WRITE_B16
109; SI: DS_WRITE_B16
110; SI: DS_WRITE_B16
111; SI: DS_WRITE_B16
112; SI: DS_WRITE_B16
113; SI: DS_WRITE_B16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000114; SI: DS_WRITE_B16
115
Matt Arsenaultd5f4de22014-08-06 00:29:49 +0000116; SI: DS_WRITE_B16
117; SI: DS_WRITE_B16
118; SI: DS_WRITE_B16
119; SI: DS_WRITE_B16
120; SI: DS_WRITE_B16
121; SI: DS_WRITE_B16
122; SI: DS_WRITE_B16
123; SI: DS_WRITE_B16
124
Matt Arsenault46645fa2014-07-28 17:49:26 +0000125; SI: S_ENDPGM
126define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
127 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
128 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
129 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
130 ret void
131}
132
133; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align4
134; SI-DAG: DS_READ_B32
135; SI-DAG: DS_WRITE_B32
136
137; SI-DAG: DS_READ_B32
138; SI-DAG: DS_WRITE_B32
139
140; SI-DAG: DS_READ_B32
141; SI-DAG: DS_WRITE_B32
142
143; SI-DAG: DS_READ_B32
144; SI-DAG: DS_WRITE_B32
145
146; SI-DAG: DS_READ_B32
147; SI-DAG: DS_WRITE_B32
148
149; SI-DAG: DS_READ_B32
150; SI-DAG: DS_WRITE_B32
151
152; SI-DAG: DS_READ_B32
153; SI-DAG: DS_WRITE_B32
154
155; SI-DAG: DS_READ_B32
156; SI-DAG: DS_WRITE_B32
157
158; SI-DAG: DS_READ_B32
159; SI-DAG: DS_WRITE_B32
160
161; SI: S_ENDPGM
162define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
163 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
164 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
165 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
166 ret void
167}
168
169; FIXME: Use 64-bit ops
170; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align8
171
172; SI-DAG: DS_READ_B32
173; SI-DAG: DS_WRITE_B32
174
175; SI-DAG: DS_READ_B32
176; SI-DAG: DS_WRITE_B32
177
178; SI-DAG: DS_READ_B32
179; SI-DAG: DS_WRITE_B32
180
181; SI-DAG: DS_READ_B32
182; SI-DAG: DS_WRITE_B32
183
184; SI-DAG: DS_READ_B32
185; SI-DAG: DS_WRITE_B32
186
187; SI-DAG: DS_READ_B32
188; SI-DAG: DS_WRITE_B32
189
190; SI-DAG: DS_READ_B32
191; SI-DAG: DS_WRITE_B32
192
193; SI-DAG: DS_READ_B32
194; SI-DAG: DS_WRITE_B32
195
196; SI-DAG: DS_READ_B32
197; SI-DAG: DS_WRITE_B32
198
199; SI-DAG: S_ENDPGM
200define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
201 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
202 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
203 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
204 ret void
205}
206
207; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align1
208; SI-DAG: BUFFER_LOAD_UBYTE
209; SI-DAG: BUFFER_STORE_BYTE
210; SI-DAG: BUFFER_LOAD_UBYTE
211; SI-DAG: BUFFER_STORE_BYTE
212; SI-DAG: BUFFER_LOAD_UBYTE
213; SI-DAG: BUFFER_STORE_BYTE
214; SI-DAG: BUFFER_LOAD_UBYTE
215; SI-DAG: BUFFER_STORE_BYTE
216; SI-DAG: BUFFER_LOAD_UBYTE
217; SI-DAG: BUFFER_STORE_BYTE
218; SI-DAG: BUFFER_LOAD_UBYTE
219; SI-DAG: BUFFER_STORE_BYTE
220; SI-DAG: BUFFER_LOAD_UBYTE
221; SI-DAG: BUFFER_STORE_BYTE
222; SI-DAG: BUFFER_LOAD_UBYTE
223; SI-DAG: BUFFER_STORE_BYTE
224
225; SI-DAG: BUFFER_LOAD_UBYTE
226; SI-DAG: BUFFER_STORE_BYTE
227; SI-DAG: BUFFER_LOAD_UBYTE
228; SI-DAG: BUFFER_STORE_BYTE
229; SI-DAG: BUFFER_LOAD_UBYTE
230; SI-DAG: BUFFER_STORE_BYTE
231; SI-DAG: BUFFER_LOAD_UBYTE
232; SI-DAG: BUFFER_STORE_BYTE
233; SI-DAG: BUFFER_LOAD_UBYTE
234; SI-DAG: BUFFER_STORE_BYTE
235; SI-DAG: BUFFER_LOAD_UBYTE
236; SI-DAG: BUFFER_STORE_BYTE
237; SI-DAG: BUFFER_LOAD_UBYTE
238; SI-DAG: BUFFER_STORE_BYTE
239; SI-DAG: BUFFER_LOAD_UBYTE
240; SI-DAG: BUFFER_STORE_BYTE
241
242; SI-DAG: BUFFER_LOAD_UBYTE
243; SI-DAG: BUFFER_STORE_BYTE
244; SI-DAG: BUFFER_LOAD_UBYTE
245; SI-DAG: BUFFER_STORE_BYTE
246; SI-DAG: BUFFER_LOAD_UBYTE
247; SI-DAG: BUFFER_STORE_BYTE
248; SI-DAG: BUFFER_LOAD_UBYTE
249; SI-DAG: BUFFER_STORE_BYTE
250; SI-DAG: BUFFER_LOAD_UBYTE
251; SI-DAG: BUFFER_STORE_BYTE
252; SI-DAG: BUFFER_LOAD_UBYTE
253; SI-DAG: BUFFER_STORE_BYTE
254; SI-DAG: BUFFER_LOAD_UBYTE
255; SI-DAG: BUFFER_STORE_BYTE
256; SI-DAG: BUFFER_LOAD_UBYTE
257; SI-DAG: BUFFER_STORE_BYTE
258
259; SI-DAG: BUFFER_LOAD_UBYTE
260; SI-DAG: BUFFER_STORE_BYTE
261; SI-DAG: BUFFER_LOAD_UBYTE
262; SI-DAG: BUFFER_STORE_BYTE
263; SI-DAG: BUFFER_LOAD_UBYTE
264; SI-DAG: BUFFER_STORE_BYTE
265; SI-DAG: BUFFER_LOAD_UBYTE
266; SI-DAG: BUFFER_STORE_BYTE
267; SI-DAG: BUFFER_LOAD_UBYTE
268; SI-DAG: BUFFER_STORE_BYTE
269; SI-DAG: BUFFER_LOAD_UBYTE
270; SI-DAG: BUFFER_STORE_BYTE
271; SI-DAG: BUFFER_LOAD_UBYTE
272; SI-DAG: BUFFER_STORE_BYTE
273; SI-DAG: BUFFER_LOAD_UBYTE
274; SI-DAG: BUFFER_STORE_BYTE
275
276; SI: S_ENDPGM
277define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
278 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
279 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
280 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
281 ret void
282}
283
284; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align2
285; SI-DAG: BUFFER_LOAD_USHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000286; SI-DAG: BUFFER_LOAD_USHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000287; SI-DAG: BUFFER_LOAD_USHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000288; SI-DAG: BUFFER_LOAD_USHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000289; SI-DAG: BUFFER_LOAD_USHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000290; SI-DAG: BUFFER_LOAD_USHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000291; SI-DAG: BUFFER_LOAD_USHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000292; SI-DAG: BUFFER_LOAD_USHORT
Matt Arsenaultd5f4de22014-08-06 00:29:49 +0000293; SI-DAG: BUFFER_LOAD_USHORT
294; SI-DAG: BUFFER_LOAD_USHORT
295; SI-DAG: BUFFER_LOAD_USHORT
296; SI-DAG: BUFFER_LOAD_USHORT
297; SI-DAG: BUFFER_LOAD_USHORT
298; SI-DAG: BUFFER_LOAD_USHORT
299; SI-DAG: BUFFER_LOAD_USHORT
300; SI-DAG: BUFFER_LOAD_USHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000301
Matt Arsenault46645fa2014-07-28 17:49:26 +0000302; SI-DAG: BUFFER_STORE_SHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000303; SI-DAG: BUFFER_STORE_SHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000304; SI-DAG: BUFFER_STORE_SHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000305; SI-DAG: BUFFER_STORE_SHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000306; SI-DAG: BUFFER_STORE_SHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000307; SI-DAG: BUFFER_STORE_SHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000308; SI-DAG: BUFFER_STORE_SHORT
Matt Arsenaultd5f4de22014-08-06 00:29:49 +0000309; SI-DAG: BUFFER_STORE_SHORT
310; SI-DAG: BUFFER_STORE_SHORT
311; SI-DAG: BUFFER_STORE_SHORT
312; SI-DAG: BUFFER_STORE_SHORT
313; SI-DAG: BUFFER_STORE_SHORT
314; SI-DAG: BUFFER_STORE_SHORT
315; SI-DAG: BUFFER_STORE_SHORT
316; SI-DAG: BUFFER_STORE_SHORT
Matt Arsenault46645fa2014-07-28 17:49:26 +0000317; SI-DAG: BUFFER_STORE_SHORT
318
319; SI: S_ENDPGM
320define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
321 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
322 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
323 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
324 ret void
325}
326
327; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align4
328; SI: BUFFER_LOAD_DWORDX4
Matt Arsenault46645fa2014-07-28 17:49:26 +0000329; SI: BUFFER_LOAD_DWORDX4
330; SI: BUFFER_STORE_DWORDX4
Matt Arsenaultd5f4de22014-08-06 00:29:49 +0000331; SI: BUFFER_STORE_DWORDX4
Matt Arsenault46645fa2014-07-28 17:49:26 +0000332; SI: S_ENDPGM
333define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
334 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
335 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
336 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
337 ret void
338}
339
340; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align8
341; SI: BUFFER_LOAD_DWORDX4
Matt Arsenault46645fa2014-07-28 17:49:26 +0000342; SI: BUFFER_LOAD_DWORDX4
343; SI: BUFFER_STORE_DWORDX4
Matt Arsenaultd5f4de22014-08-06 00:29:49 +0000344; SI: BUFFER_STORE_DWORDX4
Matt Arsenault46645fa2014-07-28 17:49:26 +0000345; SI: S_ENDPGM
346define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
347 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
348 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
349 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
350 ret void
351}
352
353; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align16
354; SI: BUFFER_LOAD_DWORDX4
Matt Arsenault46645fa2014-07-28 17:49:26 +0000355; SI: BUFFER_LOAD_DWORDX4
356; SI: BUFFER_STORE_DWORDX4
Matt Arsenaultd5f4de22014-08-06 00:29:49 +0000357; SI: BUFFER_STORE_DWORDX4
Matt Arsenault46645fa2014-07-28 17:49:26 +0000358; SI: S_ENDPGM
359define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
360 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
361 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
362 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
363 ret void
364}