blob: 7b4db55155eb38b204a180aa59fed84cb09cfc4d [file] [log] [blame]
Matt Arsenault749035b2016-07-30 01:40:36 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenault46645fa2014-07-28 17:49:26 +00003
Pete Cooper67cf9a72015-11-19 05:56:52 +00004declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
5declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
Matt Arsenault749035b2016-07-30 01:40:36 +00006declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +00007
8
Tom Stellard79243d92014-10-01 17:15:17 +00009; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
Tom Stellard1d5e6d42016-03-30 16:35:13 +000010; SI-DAG: ds_read_u8
11; SI-DAG: ds_read_u8
12; SI-DAG: ds_read_u8
13; SI-DAG: ds_read_u8
14; SI-DAG: ds_read_u8
15; SI-DAG: ds_read_u8
16; SI-DAG: ds_read_u8
17; SI-DAG: ds_read_u8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000018
Tom Stellard1d5e6d42016-03-30 16:35:13 +000019; SI-DAG: ds_read_u8
20; SI-DAG: ds_read_u8
21; SI-DAG: ds_read_u8
22; SI-DAG: ds_read_u8
23; SI-DAG: ds_read_u8
24; SI-DAG: ds_read_u8
25; SI-DAG: ds_read_u8
26; SI-DAG: ds_read_u8
Matt Arsenault46645fa2014-07-28 17:49:26 +000027
Tom Stellard1d5e6d42016-03-30 16:35:13 +000028; SI-DAG: ds_read_u8
29; SI-DAG: ds_read_u8
30; SI-DAG: ds_read_u8
31; SI-DAG: ds_read_u8
32; SI-DAG: ds_read_u8
33; SI-DAG: ds_read_u8
34; SI-DAG: ds_read_u8
35; SI-DAG: ds_read_u8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000036
Tom Stellard1d5e6d42016-03-30 16:35:13 +000037; SI-DAG: ds_read_u8
38; SI-DAG: ds_read_u8
39; SI-DAG: ds_read_u8
40; SI-DAG: ds_read_u8
41; SI-DAG: ds_read_u8
42; SI-DAG: ds_read_u8
43; SI-DAG: ds_read_u8
44; SI-DAG: ds_read_u8
Matt Arsenault46645fa2014-07-28 17:49:26 +000045
Tom Stellard1d5e6d42016-03-30 16:35:13 +000046; SI-DAG: ds_write_b8
47; SI-DAG: ds_write_b8
48; SI-DAG: ds_write_b8
49; SI-DAG: ds_write_b8
50; SI-DAG: ds_write_b8
51; SI-DAG: ds_write_b8
52; SI-DAG: ds_write_b8
53; SI-DAG: ds_write_b8
Tom Stellard83f0bce2015-01-29 16:55:25 +000054
Tom Stellard1d5e6d42016-03-30 16:35:13 +000055; SI-DAG: ds_write_b8
56; SI-DAG: ds_write_b8
57; SI-DAG: ds_write_b8
58; SI-DAG: ds_write_b8
59; SI-DAG: ds_write_b8
60; SI-DAG: ds_write_b8
61; SI-DAG: ds_write_b8
62; SI-DAG: ds_write_b8
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000063
Tom Stellard1d5e6d42016-03-30 16:35:13 +000064; SI-DAG: ds_write_b8
65; SI-DAG: ds_write_b8
66; SI-DAG: ds_write_b8
67; SI-DAG: ds_write_b8
68; SI-DAG: ds_write_b8
69; SI-DAG: ds_write_b8
70; SI-DAG: ds_write_b8
71; SI-DAG: ds_write_b8
Tom Stellard83f0bce2015-01-29 16:55:25 +000072
Tom Stellard1d5e6d42016-03-30 16:35:13 +000073; SI-DAG: ds_write_b8
74; SI-DAG: ds_write_b8
75; SI-DAG: ds_write_b8
76; SI-DAG: ds_write_b8
77; SI-DAG: ds_write_b8
78; SI-DAG: ds_write_b8
79; SI-DAG: ds_write_b8
80; SI-DAG: ds_write_b8
Matt Arsenault46645fa2014-07-28 17:49:26 +000081
Tom Stellard326d6ec2014-11-05 14:50:53 +000082; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000083define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
Matt Arsenault46645fa2014-07-28 17:49:26 +000084 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
85 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
Pete Cooper67cf9a72015-11-19 05:56:52 +000086 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +000087 ret void
88}
89
Tom Stellard79243d92014-10-01 17:15:17 +000090; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
Tom Stellard1d5e6d42016-03-30 16:35:13 +000091; SI-DAG: ds_read_u16
92; SI-DAG: ds_read_u16
93; SI-DAG: ds_read_u16
94; SI-DAG: ds_read_u16
95; SI-DAG: ds_read_u16
96; SI-DAG: ds_read_u16
97; SI-DAG: ds_read_u16
98; SI-DAG: ds_read_u16
Matt Arsenault46645fa2014-07-28 17:49:26 +000099
Tom Stellard1d5e6d42016-03-30 16:35:13 +0000100; SI-DAG: ds_read_u16
101; SI-DAG: ds_read_u16
102; SI-DAG: ds_read_u16
103; SI-DAG: ds_read_u16
104; SI-DAG: ds_read_u16
105; SI-DAG: ds_read_u16
106; SI-DAG: ds_read_u16
107; SI-DAG: ds_read_u16
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000108
Tom Stellard1d5e6d42016-03-30 16:35:13 +0000109; SI-DAG: ds_write_b16
110; SI-DAG: ds_write_b16
111; SI-DAG: ds_write_b16
112; SI-DAG: ds_write_b16
113; SI-DAG: ds_write_b16
114; SI-DAG: ds_write_b16
115; SI-DAG: ds_write_b16
116; SI-DAG: ds_write_b16
Matt Arsenault46645fa2014-07-28 17:49:26 +0000117
Tom Stellard1d5e6d42016-03-30 16:35:13 +0000118; SI-DAG: ds_write_b16
119; SI-DAG: ds_write_b16
120; SI-DAG: ds_write_b16
121; SI-DAG: ds_write_b16
122; SI-DAG: ds_write_b16
123; SI-DAG: ds_write_b16
124; SI-DAG: ds_write_b16
125; SI-DAG: ds_write_b16
Matt Arsenaultd5f4de22014-08-06 00:29:49 +0000126
Tom Stellard326d6ec2014-11-05 14:50:53 +0000127; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000128define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
Matt Arsenault46645fa2014-07-28 17:49:26 +0000129 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
130 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000131 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000132 ret void
133}
134
Tom Stellard79243d92014-10-01 17:15:17 +0000135; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
Matt Arsenaultff05da82015-11-24 12:18:54 +0000136; SI: ds_read2_b32
137; SI: ds_read2_b32
138; SI: ds_read2_b32
139; SI: ds_read2_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000140
Matt Arsenaultff05da82015-11-24 12:18:54 +0000141; SI: ds_write2_b32
142; SI: ds_write2_b32
143; SI: ds_write2_b32
144; SI: ds_write2_b32
Matt Arsenault46645fa2014-07-28 17:49:26 +0000145
Tom Stellard326d6ec2014-11-05 14:50:53 +0000146; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000147define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
Matt Arsenault46645fa2014-07-28 17:49:26 +0000148 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
149 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000150 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000151 ret void
152}
153
154; FIXME: Use 64-bit ops
Tom Stellard79243d92014-10-01 17:15:17 +0000155; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
Matt Arsenault46645fa2014-07-28 17:49:26 +0000156
Changpeng Fang71369b32016-05-26 19:35:29 +0000157; SI: ds_read2_b64
158; SI: ds_read2_b64
Matt Arsenault46645fa2014-07-28 17:49:26 +0000159
Changpeng Fang71369b32016-05-26 19:35:29 +0000160; SI: ds_write2_b64
161; SI: ds_write2_b64
Matt Arsenault46645fa2014-07-28 17:49:26 +0000162
Tom Stellard326d6ec2014-11-05 14:50:53 +0000163; SI-DAG: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000164define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
Matt Arsenault46645fa2014-07-28 17:49:26 +0000165 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
166 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000167 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000168 ret void
169}
170
Tom Stellard79243d92014-10-01 17:15:17 +0000171; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000172; SI-DAG: buffer_load_ubyte
173; SI-DAG: buffer_store_byte
174; SI-DAG: buffer_load_ubyte
175; SI-DAG: buffer_store_byte
176; SI-DAG: buffer_load_ubyte
177; SI-DAG: buffer_store_byte
178; SI-DAG: buffer_load_ubyte
179; SI-DAG: buffer_store_byte
180; SI-DAG: buffer_load_ubyte
181; SI-DAG: buffer_store_byte
182; SI-DAG: buffer_load_ubyte
183; SI-DAG: buffer_store_byte
184; SI-DAG: buffer_load_ubyte
185; SI-DAG: buffer_store_byte
186; SI-DAG: buffer_load_ubyte
187; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000188
Tom Stellard326d6ec2014-11-05 14:50:53 +0000189; SI-DAG: buffer_load_ubyte
190; SI-DAG: buffer_store_byte
191; SI-DAG: buffer_load_ubyte
192; SI-DAG: buffer_store_byte
193; SI-DAG: buffer_load_ubyte
194; SI-DAG: buffer_store_byte
195; SI-DAG: buffer_load_ubyte
196; SI-DAG: buffer_store_byte
197; SI-DAG: buffer_load_ubyte
198; SI-DAG: buffer_store_byte
199; SI-DAG: buffer_load_ubyte
200; SI-DAG: buffer_store_byte
201; SI-DAG: buffer_load_ubyte
202; SI-DAG: buffer_store_byte
203; SI-DAG: buffer_load_ubyte
204; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000205
Tom Stellard326d6ec2014-11-05 14:50:53 +0000206; SI-DAG: buffer_load_ubyte
207; SI-DAG: buffer_store_byte
208; SI-DAG: buffer_load_ubyte
209; SI-DAG: buffer_store_byte
210; SI-DAG: buffer_load_ubyte
211; SI-DAG: buffer_store_byte
212; SI-DAG: buffer_load_ubyte
213; SI-DAG: buffer_store_byte
214; SI-DAG: buffer_load_ubyte
215; SI-DAG: buffer_store_byte
216; SI-DAG: buffer_load_ubyte
217; SI-DAG: buffer_store_byte
218; SI-DAG: buffer_load_ubyte
219; SI-DAG: buffer_store_byte
220; SI-DAG: buffer_load_ubyte
221; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000222
Tom Stellard326d6ec2014-11-05 14:50:53 +0000223; SI-DAG: buffer_load_ubyte
224; SI-DAG: buffer_store_byte
225; SI-DAG: buffer_load_ubyte
226; SI-DAG: buffer_store_byte
227; SI-DAG: buffer_load_ubyte
228; SI-DAG: buffer_store_byte
229; SI-DAG: buffer_load_ubyte
230; SI-DAG: buffer_store_byte
231; SI-DAG: buffer_load_ubyte
232; SI-DAG: buffer_store_byte
233; SI-DAG: buffer_load_ubyte
234; SI-DAG: buffer_store_byte
235; SI-DAG: buffer_load_ubyte
236; SI-DAG: buffer_store_byte
237; SI-DAG: buffer_load_ubyte
238; SI-DAG: buffer_store_byte
Matt Arsenault46645fa2014-07-28 17:49:26 +0000239
Tom Stellard326d6ec2014-11-05 14:50:53 +0000240; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000241define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
Matt Arsenault46645fa2014-07-28 17:49:26 +0000242 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
243 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000244 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000245 ret void
246}
247
Tom Stellard79243d92014-10-01 17:15:17 +0000248; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000249; SI-DAG: buffer_load_ushort
250; SI-DAG: buffer_load_ushort
251; SI-DAG: buffer_load_ushort
252; SI-DAG: buffer_load_ushort
253; SI-DAG: buffer_load_ushort
254; SI-DAG: buffer_load_ushort
255; SI-DAG: buffer_load_ushort
256; SI-DAG: buffer_load_ushort
257; SI-DAG: buffer_load_ushort
258; SI-DAG: buffer_load_ushort
259; SI-DAG: buffer_load_ushort
260; SI-DAG: buffer_load_ushort
261; SI-DAG: buffer_load_ushort
262; SI-DAG: buffer_load_ushort
263; SI-DAG: buffer_load_ushort
264; SI-DAG: buffer_load_ushort
Matt Arsenault46645fa2014-07-28 17:49:26 +0000265
Tom Stellard326d6ec2014-11-05 14:50:53 +0000266; SI-DAG: buffer_store_short
267; SI-DAG: buffer_store_short
268; SI-DAG: buffer_store_short
269; SI-DAG: buffer_store_short
270; SI-DAG: buffer_store_short
271; SI-DAG: buffer_store_short
272; SI-DAG: buffer_store_short
273; SI-DAG: buffer_store_short
274; SI-DAG: buffer_store_short
275; SI-DAG: buffer_store_short
276; SI-DAG: buffer_store_short
277; SI-DAG: buffer_store_short
278; SI-DAG: buffer_store_short
279; SI-DAG: buffer_store_short
280; SI-DAG: buffer_store_short
281; SI-DAG: buffer_store_short
Matt Arsenault46645fa2014-07-28 17:49:26 +0000282
Tom Stellard326d6ec2014-11-05 14:50:53 +0000283; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000284define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
Matt Arsenault46645fa2014-07-28 17:49:26 +0000285 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
286 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000287 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000288 ret void
289}
290
Tom Stellard79243d92014-10-01 17:15:17 +0000291; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000292; SI: buffer_load_dwordx4
293; SI: buffer_load_dwordx4
294; SI: buffer_store_dwordx4
295; SI: buffer_store_dwordx4
296; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000297define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
Matt Arsenault46645fa2014-07-28 17:49:26 +0000298 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
299 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000300 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000301 ret void
302}
303
Tom Stellard79243d92014-10-01 17:15:17 +0000304; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000305; SI: buffer_load_dwordx4
306; SI: buffer_load_dwordx4
307; SI: buffer_store_dwordx4
308; SI: buffer_store_dwordx4
309; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000310define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
Matt Arsenault46645fa2014-07-28 17:49:26 +0000311 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
312 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000313 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000314 ret void
315}
316
Tom Stellard79243d92014-10-01 17:15:17 +0000317; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000318; SI: buffer_load_dwordx4
319; SI: buffer_load_dwordx4
320; SI: buffer_store_dwordx4
321; SI: buffer_store_dwordx4
322; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000323define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
Matt Arsenault46645fa2014-07-28 17:49:26 +0000324 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
325 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
Pete Cooper67cf9a72015-11-19 05:56:52 +0000326 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
Matt Arsenault46645fa2014-07-28 17:49:26 +0000327 ret void
328}
Matt Arsenault749035b2016-07-30 01:40:36 +0000329
330; Test shouldConvertConstantLoadToIntImm
331@hello.align4 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 4
332@hello.align1 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 1
333
334; FUNC-LABEL: {{^}}test_memcpy_const_string_align4:
335; SI: s_getpc_b64
Konstantin Zhuravlyov08326b62016-10-20 18:12:38 +0000336; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, hello.align4+20
Matt Arsenault749035b2016-07-30 01:40:36 +0000337; SI: s_addc_u32
Konstantin Zhuravlyov08326b62016-10-20 18:12:38 +0000338; SI-DAG: s_load_dwordx4
339; SI-DAG: s_load_dwordx4
340; SI-DAG: s_load_dwordx2
341; SI-DAG: buffer_store_dwordx4
342; SI-DAG: buffer_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000343define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind {
Matt Arsenault749035b2016-07-30 01:40:36 +0000344 %str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)*
345 call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 4, i1 false)
346 ret void
347}
348
349; FUNC-LABEL: {{^}}test_memcpy_const_string_align1:
350; SI-NOT: buffer_load
351; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x69
352; SI: buffer_store_byte
353; SI: buffer_store_byte
354; SI: buffer_store_byte
355; SI: buffer_store_byte
356; SI: buffer_store_byte
357; SI: buffer_store_byte
358; SI: buffer_store_byte
359; SI: buffer_store_byte
360; SI: buffer_store_byte
361; SI: buffer_store_byte
362; SI: buffer_store_byte
363; SI: buffer_store_byte
364; SI: buffer_store_byte
365; SI: buffer_store_byte
366; SI: buffer_store_byte
367; SI: buffer_store_byte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000368define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind {
Matt Arsenault749035b2016-07-30 01:40:36 +0000369 %str = bitcast [16 x i8] addrspace(2)* @hello.align1 to i8 addrspace(2)*
370 call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 1, i1 false)
371 ret void
372}