blob: 61d126e254751e968023d1b17743a488b3599d7a [file] [log] [blame]
Sanjay Patel7c615072016-03-29 17:09:27 +00001; NOTE: Assertions have been autogenerated by update_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=SSE2
3; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX2
5
Sanjay Patel61e13242016-03-31 20:40:32 +00006; https://llvm.org/bugs/show_bug.cgi?id=27100
7
Sanjay Patel7c615072016-03-29 17:09:27 +00008define void @memset_16_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +00009; SSE2-LABEL: memset_16_nonzero_bytes:
10; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
11; SSE2-NEXT: movq %rax, 8(%rdi)
12; SSE2-NEXT: movq %rax, (%rdi)
13; SSE2-NEXT: retq
14;
Sanjay Patel9f413362016-04-01 17:36:45 +000015; AVX-LABEL: memset_16_nonzero_bytes:
16; AVX: vmovaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
17; AVX-NEXT: vmovups %xmm0, (%rdi)
18; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000019;
20 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 16, i64 -1)
21 ret void
22}
23
24define void @memset_32_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000025; SSE2-LABEL: memset_32_nonzero_bytes:
26; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
27; SSE2-NEXT: movq %rax, 24(%rdi)
28; SSE2-NEXT: movq %rax, 16(%rdi)
29; SSE2-NEXT: movq %rax, 8(%rdi)
30; SSE2-NEXT: movq %rax, (%rdi)
31; SSE2-NEXT: retq
32;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000033; AVX-LABEL: memset_32_nonzero_bytes:
34; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
35; AVX-NEXT: vmovups %ymm0, (%rdi)
36; AVX-NEXT: vzeroupper
37; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000038;
39 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 32, i64 -1)
40 ret void
41}
42
43define void @memset_64_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000044; SSE2-LABEL: memset_64_nonzero_bytes:
45; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
46; SSE2-NEXT: movq %rax, 56(%rdi)
47; SSE2-NEXT: movq %rax, 48(%rdi)
48; SSE2-NEXT: movq %rax, 40(%rdi)
49; SSE2-NEXT: movq %rax, 32(%rdi)
50; SSE2-NEXT: movq %rax, 24(%rdi)
51; SSE2-NEXT: movq %rax, 16(%rdi)
52; SSE2-NEXT: movq %rax, 8(%rdi)
53; SSE2-NEXT: movq %rax, (%rdi)
54; SSE2-NEXT: retq
55;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000056; AVX-LABEL: memset_64_nonzero_bytes:
57; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
58; AVX-NEXT: vmovups %ymm0, 32(%rdi)
59; AVX-NEXT: vmovups %ymm0, (%rdi)
60; AVX-NEXT: vzeroupper
61; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000062;
63 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1)
64 ret void
65}
66
67define void @memset_128_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000068; SSE2-LABEL: memset_128_nonzero_bytes:
69; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
70; SSE2-NEXT: movq %rax, 120(%rdi)
71; SSE2-NEXT: movq %rax, 112(%rdi)
72; SSE2-NEXT: movq %rax, 104(%rdi)
73; SSE2-NEXT: movq %rax, 96(%rdi)
74; SSE2-NEXT: movq %rax, 88(%rdi)
75; SSE2-NEXT: movq %rax, 80(%rdi)
76; SSE2-NEXT: movq %rax, 72(%rdi)
77; SSE2-NEXT: movq %rax, 64(%rdi)
78; SSE2-NEXT: movq %rax, 56(%rdi)
79; SSE2-NEXT: movq %rax, 48(%rdi)
80; SSE2-NEXT: movq %rax, 40(%rdi)
81; SSE2-NEXT: movq %rax, 32(%rdi)
82; SSE2-NEXT: movq %rax, 24(%rdi)
83; SSE2-NEXT: movq %rax, 16(%rdi)
84; SSE2-NEXT: movq %rax, 8(%rdi)
85; SSE2-NEXT: movq %rax, (%rdi)
86; SSE2-NEXT: retq
87;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000088; AVX-LABEL: memset_128_nonzero_bytes:
89; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
90; AVX-NEXT: vmovups %ymm0, 96(%rdi)
91; AVX-NEXT: vmovups %ymm0, 64(%rdi)
92; AVX-NEXT: vmovups %ymm0, 32(%rdi)
93; AVX-NEXT: vmovups %ymm0, (%rdi)
94; AVX-NEXT: vzeroupper
95; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000096;
97 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1)
98 ret void
99}
100
101define void @memset_256_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +0000102; SSE2-LABEL: memset_256_nonzero_bytes:
103; SSE2: pushq %rax
104; SSE2-NEXT: .Ltmp0:
105; SSE2-NEXT: .cfi_def_cfa_offset 16
106; SSE2-NEXT: movl $42, %esi
107; SSE2-NEXT: movl $256, %edx # imm = 0x100
108; SSE2-NEXT: callq memset
109; SSE2-NEXT: popq %rax
110; SSE2-NEXT: retq
111;
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000112; AVX-LABEL: memset_256_nonzero_bytes:
113; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
114; AVX-NEXT: vmovups %ymm0, 224(%rdi)
115; AVX-NEXT: vmovups %ymm0, 192(%rdi)
116; AVX-NEXT: vmovups %ymm0, 160(%rdi)
117; AVX-NEXT: vmovups %ymm0, 128(%rdi)
118; AVX-NEXT: vmovups %ymm0, 96(%rdi)
119; AVX-NEXT: vmovups %ymm0, 64(%rdi)
120; AVX-NEXT: vmovups %ymm0, 32(%rdi)
121; AVX-NEXT: vmovups %ymm0, (%rdi)
122; AVX-NEXT: vzeroupper
123; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000124;
125 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1)
126 ret void
127}
128
129declare i8* @__memset_chk(i8*, i32, i64, i64)
130
Sanjay Patel61e13242016-03-31 20:40:32 +0000131; Repeat with a non-constant value for the stores.
132
133define void @memset_16_nonconst_bytes(i8* %x, i8 %c) {
134; SSE2-LABEL: memset_16_nonconst_bytes:
135; SSE2: movzbl %sil, %eax
136; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
137; SSE2-NEXT: imulq %rax, %rcx
138; SSE2-NEXT: movq %rcx, 8(%rdi)
139; SSE2-NEXT: movq %rcx, (%rdi)
140; SSE2-NEXT: retq
141;
142; AVX1-LABEL: memset_16_nonconst_bytes:
Sanjay Patel9f413362016-04-01 17:36:45 +0000143; AVX1: vmovd %esi, %xmm0
144; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
145; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000146; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
147; AVX1-NEXT: retq
148;
149; AVX2-LABEL: memset_16_nonconst_bytes:
Sanjay Patel9f413362016-04-01 17:36:45 +0000150; AVX2: vmovd %esi, %xmm0
151; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
152; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000153; AVX2-NEXT: retq
154;
155 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i32 1, i1 false)
156 ret void
157}
158
159define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
160; SSE2-LABEL: memset_32_nonconst_bytes:
161; SSE2: movzbl %sil, %eax
162; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
163; SSE2-NEXT: imulq %rax, %rcx
164; SSE2-NEXT: movq %rcx, 24(%rdi)
165; SSE2-NEXT: movq %rcx, 16(%rdi)
166; SSE2-NEXT: movq %rcx, 8(%rdi)
167; SSE2-NEXT: movq %rcx, (%rdi)
168; SSE2-NEXT: retq
169;
170; AVX1-LABEL: memset_32_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000171; AVX1: vmovd %esi, %xmm0
172; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
173; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000174; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
175; AVX1-NEXT: vmovups %ymm0, (%rdi)
176; AVX1-NEXT: vzeroupper
177; AVX1-NEXT: retq
178;
179; AVX2-LABEL: memset_32_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000180; AVX2: vmovd %esi, %xmm0
181; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
182; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000183; AVX2-NEXT: vzeroupper
184; AVX2-NEXT: retq
185;
186 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i32 1, i1 false)
187 ret void
188}
189
190define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
191; SSE2-LABEL: memset_64_nonconst_bytes:
192; SSE2: movzbl %sil, %eax
193; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
194; SSE2-NEXT: imulq %rax, %rcx
195; SSE2-NEXT: movq %rcx, 56(%rdi)
196; SSE2-NEXT: movq %rcx, 48(%rdi)
197; SSE2-NEXT: movq %rcx, 40(%rdi)
198; SSE2-NEXT: movq %rcx, 32(%rdi)
199; SSE2-NEXT: movq %rcx, 24(%rdi)
200; SSE2-NEXT: movq %rcx, 16(%rdi)
201; SSE2-NEXT: movq %rcx, 8(%rdi)
202; SSE2-NEXT: movq %rcx, (%rdi)
203; SSE2-NEXT: retq
204;
205; AVX1-LABEL: memset_64_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000206; AVX1: vmovd %esi, %xmm0
207; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
208; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000209; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
210; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
211; AVX1-NEXT: vmovups %ymm0, (%rdi)
212; AVX1-NEXT: vzeroupper
213; AVX1-NEXT: retq
214;
215; AVX2-LABEL: memset_64_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000216; AVX2: vmovd %esi, %xmm0
217; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
218; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
219; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000220; AVX2-NEXT: vzeroupper
221; AVX2-NEXT: retq
222;
223 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i32 1, i1 false)
224 ret void
225}
226
227define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
228; SSE2-LABEL: memset_128_nonconst_bytes:
229; SSE2: movzbl %sil, %eax
230; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
231; SSE2-NEXT: imulq %rax, %rcx
232; SSE2-NEXT: movq %rcx, 120(%rdi)
233; SSE2-NEXT: movq %rcx, 112(%rdi)
234; SSE2-NEXT: movq %rcx, 104(%rdi)
235; SSE2-NEXT: movq %rcx, 96(%rdi)
236; SSE2-NEXT: movq %rcx, 88(%rdi)
237; SSE2-NEXT: movq %rcx, 80(%rdi)
238; SSE2-NEXT: movq %rcx, 72(%rdi)
239; SSE2-NEXT: movq %rcx, 64(%rdi)
240; SSE2-NEXT: movq %rcx, 56(%rdi)
241; SSE2-NEXT: movq %rcx, 48(%rdi)
242; SSE2-NEXT: movq %rcx, 40(%rdi)
243; SSE2-NEXT: movq %rcx, 32(%rdi)
244; SSE2-NEXT: movq %rcx, 24(%rdi)
245; SSE2-NEXT: movq %rcx, 16(%rdi)
246; SSE2-NEXT: movq %rcx, 8(%rdi)
247; SSE2-NEXT: movq %rcx, (%rdi)
248; SSE2-NEXT: retq
249;
250; AVX1-LABEL: memset_128_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000251; AVX1: vmovd %esi, %xmm0
252; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
253; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000254; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
255; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
256; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
257; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
258; AVX1-NEXT: vmovups %ymm0, (%rdi)
259; AVX1-NEXT: vzeroupper
260; AVX1-NEXT: retq
261;
262; AVX2-LABEL: memset_128_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000263; AVX2: vmovd %esi, %xmm0
264; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
265; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
266; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
267; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
268; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000269; AVX2-NEXT: vzeroupper
270; AVX2-NEXT: retq
271;
272 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i32 1, i1 false)
273 ret void
274}
275
276define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
277; SSE2-LABEL: memset_256_nonconst_bytes:
278; SSE2: movl $256, %edx # imm = 0x100
279; SSE2-NEXT: jmp memset # TAILCALL
280;
281; AVX1-LABEL: memset_256_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000282; AVX1: vmovd %esi, %xmm0
283; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
284; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000285; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
286; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
287; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
288; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
289; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
290; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
291; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
292; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
293; AVX1-NEXT: vmovups %ymm0, (%rdi)
294; AVX1-NEXT: vzeroupper
295; AVX1-NEXT: retq
296;
297; AVX2-LABEL: memset_256_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000298; AVX2: vmovd %esi, %xmm0
299; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
300; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi)
301; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi)
302; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi)
303; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi)
304; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
305; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
306; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
307; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000308; AVX2-NEXT: vzeroupper
309; AVX2-NEXT: retq
310;
311 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i32 1, i1 false)
312 ret void
313}
314
315declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
316