blob: 5d9a615e9b47ec1647f449884004cafb05e9e827 [file] [log] [blame]
Sanjay Patel7c615072016-03-29 17:09:27 +00001; NOTE: Assertions have been autogenerated by update_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=SSE2
3; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX2
5
Sanjay Patel61e13242016-03-31 20:40:32 +00006; https://llvm.org/bugs/show_bug.cgi?id=27100
7
Sanjay Patel7c615072016-03-29 17:09:27 +00008define void @memset_16_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +00009; SSE2-LABEL: memset_16_nonzero_bytes:
10; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
11; SSE2-NEXT: movq %rax, 8(%rdi)
12; SSE2-NEXT: movq %rax, (%rdi)
13; SSE2-NEXT: retq
14;
15; AVX1-LABEL: memset_16_nonzero_bytes:
16; AVX1: vmovaps {{.*#+}} xmm0 = [707406378,707406378,707406378,707406378]
17; AVX1-NEXT: vmovups %xmm0, (%rdi)
18; AVX1-NEXT: retq
19;
20; AVX2-LABEL: memset_16_nonzero_bytes:
21; AVX2: vbroadcastss {{.*}}(%rip), %xmm0
22; AVX2-NEXT: vmovups %xmm0, (%rdi)
23; AVX2-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000024;
25 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 16, i64 -1)
26 ret void
27}
28
29define void @memset_32_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000030; SSE2-LABEL: memset_32_nonzero_bytes:
31; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
32; SSE2-NEXT: movq %rax, 24(%rdi)
33; SSE2-NEXT: movq %rax, 16(%rdi)
34; SSE2-NEXT: movq %rax, 8(%rdi)
35; SSE2-NEXT: movq %rax, (%rdi)
36; SSE2-NEXT: retq
37;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000038; AVX-LABEL: memset_32_nonzero_bytes:
39; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
40; AVX-NEXT: vmovups %ymm0, (%rdi)
41; AVX-NEXT: vzeroupper
42; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000043;
44 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 32, i64 -1)
45 ret void
46}
47
48define void @memset_64_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000049; SSE2-LABEL: memset_64_nonzero_bytes:
50; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
51; SSE2-NEXT: movq %rax, 56(%rdi)
52; SSE2-NEXT: movq %rax, 48(%rdi)
53; SSE2-NEXT: movq %rax, 40(%rdi)
54; SSE2-NEXT: movq %rax, 32(%rdi)
55; SSE2-NEXT: movq %rax, 24(%rdi)
56; SSE2-NEXT: movq %rax, 16(%rdi)
57; SSE2-NEXT: movq %rax, 8(%rdi)
58; SSE2-NEXT: movq %rax, (%rdi)
59; SSE2-NEXT: retq
60;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000061; AVX-LABEL: memset_64_nonzero_bytes:
62; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
63; AVX-NEXT: vmovups %ymm0, 32(%rdi)
64; AVX-NEXT: vmovups %ymm0, (%rdi)
65; AVX-NEXT: vzeroupper
66; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000067;
68 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1)
69 ret void
70}
71
72define void @memset_128_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000073; SSE2-LABEL: memset_128_nonzero_bytes:
74; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
75; SSE2-NEXT: movq %rax, 120(%rdi)
76; SSE2-NEXT: movq %rax, 112(%rdi)
77; SSE2-NEXT: movq %rax, 104(%rdi)
78; SSE2-NEXT: movq %rax, 96(%rdi)
79; SSE2-NEXT: movq %rax, 88(%rdi)
80; SSE2-NEXT: movq %rax, 80(%rdi)
81; SSE2-NEXT: movq %rax, 72(%rdi)
82; SSE2-NEXT: movq %rax, 64(%rdi)
83; SSE2-NEXT: movq %rax, 56(%rdi)
84; SSE2-NEXT: movq %rax, 48(%rdi)
85; SSE2-NEXT: movq %rax, 40(%rdi)
86; SSE2-NEXT: movq %rax, 32(%rdi)
87; SSE2-NEXT: movq %rax, 24(%rdi)
88; SSE2-NEXT: movq %rax, 16(%rdi)
89; SSE2-NEXT: movq %rax, 8(%rdi)
90; SSE2-NEXT: movq %rax, (%rdi)
91; SSE2-NEXT: retq
92;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000093; AVX-LABEL: memset_128_nonzero_bytes:
94; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
95; AVX-NEXT: vmovups %ymm0, 96(%rdi)
96; AVX-NEXT: vmovups %ymm0, 64(%rdi)
97; AVX-NEXT: vmovups %ymm0, 32(%rdi)
98; AVX-NEXT: vmovups %ymm0, (%rdi)
99; AVX-NEXT: vzeroupper
100; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000101;
102 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1)
103 ret void
104}
105
106define void @memset_256_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +0000107; SSE2-LABEL: memset_256_nonzero_bytes:
108; SSE2: pushq %rax
109; SSE2-NEXT: .Ltmp0:
110; SSE2-NEXT: .cfi_def_cfa_offset 16
111; SSE2-NEXT: movl $42, %esi
112; SSE2-NEXT: movl $256, %edx # imm = 0x100
113; SSE2-NEXT: callq memset
114; SSE2-NEXT: popq %rax
115; SSE2-NEXT: retq
116;
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000117; AVX-LABEL: memset_256_nonzero_bytes:
118; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
119; AVX-NEXT: vmovups %ymm0, 224(%rdi)
120; AVX-NEXT: vmovups %ymm0, 192(%rdi)
121; AVX-NEXT: vmovups %ymm0, 160(%rdi)
122; AVX-NEXT: vmovups %ymm0, 128(%rdi)
123; AVX-NEXT: vmovups %ymm0, 96(%rdi)
124; AVX-NEXT: vmovups %ymm0, 64(%rdi)
125; AVX-NEXT: vmovups %ymm0, 32(%rdi)
126; AVX-NEXT: vmovups %ymm0, (%rdi)
127; AVX-NEXT: vzeroupper
128; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000129;
130 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1)
131 ret void
132}
133
134declare i8* @__memset_chk(i8*, i32, i64, i64)
135
Sanjay Patel61e13242016-03-31 20:40:32 +0000136; Repeat with a non-constant value for the stores.
137
138define void @memset_16_nonconst_bytes(i8* %x, i8 %c) {
139; SSE2-LABEL: memset_16_nonconst_bytes:
140; SSE2: movzbl %sil, %eax
141; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
142; SSE2-NEXT: imulq %rax, %rcx
143; SSE2-NEXT: movq %rcx, 8(%rdi)
144; SSE2-NEXT: movq %rcx, (%rdi)
145; SSE2-NEXT: retq
146;
147; AVX1-LABEL: memset_16_nonconst_bytes:
148; AVX1: movzbl %sil, %eax
149; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
150; AVX1-NEXT: vmovd %eax, %xmm0
151; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
152; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
153; AVX1-NEXT: retq
154;
155; AVX2-LABEL: memset_16_nonconst_bytes:
156; AVX2: movzbl %sil, %eax
157; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
158; AVX2-NEXT: vmovd %eax, %xmm0
159; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
160; AVX2-NEXT: vmovups %xmm0, (%rdi)
161; AVX2-NEXT: retq
162;
163 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i32 1, i1 false)
164 ret void
165}
166
167define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
168; SSE2-LABEL: memset_32_nonconst_bytes:
169; SSE2: movzbl %sil, %eax
170; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
171; SSE2-NEXT: imulq %rax, %rcx
172; SSE2-NEXT: movq %rcx, 24(%rdi)
173; SSE2-NEXT: movq %rcx, 16(%rdi)
174; SSE2-NEXT: movq %rcx, 8(%rdi)
175; SSE2-NEXT: movq %rcx, (%rdi)
176; SSE2-NEXT: retq
177;
178; AVX1-LABEL: memset_32_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000179; AVX1: vmovd %esi, %xmm0
180; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
181; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000182; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
183; AVX1-NEXT: vmovups %ymm0, (%rdi)
184; AVX1-NEXT: vzeroupper
185; AVX1-NEXT: retq
186;
187; AVX2-LABEL: memset_32_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000188; AVX2: vmovd %esi, %xmm0
189; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
190; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000191; AVX2-NEXT: vzeroupper
192; AVX2-NEXT: retq
193;
194 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i32 1, i1 false)
195 ret void
196}
197
198define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
199; SSE2-LABEL: memset_64_nonconst_bytes:
200; SSE2: movzbl %sil, %eax
201; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
202; SSE2-NEXT: imulq %rax, %rcx
203; SSE2-NEXT: movq %rcx, 56(%rdi)
204; SSE2-NEXT: movq %rcx, 48(%rdi)
205; SSE2-NEXT: movq %rcx, 40(%rdi)
206; SSE2-NEXT: movq %rcx, 32(%rdi)
207; SSE2-NEXT: movq %rcx, 24(%rdi)
208; SSE2-NEXT: movq %rcx, 16(%rdi)
209; SSE2-NEXT: movq %rcx, 8(%rdi)
210; SSE2-NEXT: movq %rcx, (%rdi)
211; SSE2-NEXT: retq
212;
213; AVX1-LABEL: memset_64_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000214; AVX1: vmovd %esi, %xmm0
215; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
216; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000217; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
218; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
219; AVX1-NEXT: vmovups %ymm0, (%rdi)
220; AVX1-NEXT: vzeroupper
221; AVX1-NEXT: retq
222;
223; AVX2-LABEL: memset_64_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000224; AVX2: vmovd %esi, %xmm0
225; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
226; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
227; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000228; AVX2-NEXT: vzeroupper
229; AVX2-NEXT: retq
230;
231 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i32 1, i1 false)
232 ret void
233}
234
235define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
236; SSE2-LABEL: memset_128_nonconst_bytes:
237; SSE2: movzbl %sil, %eax
238; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
239; SSE2-NEXT: imulq %rax, %rcx
240; SSE2-NEXT: movq %rcx, 120(%rdi)
241; SSE2-NEXT: movq %rcx, 112(%rdi)
242; SSE2-NEXT: movq %rcx, 104(%rdi)
243; SSE2-NEXT: movq %rcx, 96(%rdi)
244; SSE2-NEXT: movq %rcx, 88(%rdi)
245; SSE2-NEXT: movq %rcx, 80(%rdi)
246; SSE2-NEXT: movq %rcx, 72(%rdi)
247; SSE2-NEXT: movq %rcx, 64(%rdi)
248; SSE2-NEXT: movq %rcx, 56(%rdi)
249; SSE2-NEXT: movq %rcx, 48(%rdi)
250; SSE2-NEXT: movq %rcx, 40(%rdi)
251; SSE2-NEXT: movq %rcx, 32(%rdi)
252; SSE2-NEXT: movq %rcx, 24(%rdi)
253; SSE2-NEXT: movq %rcx, 16(%rdi)
254; SSE2-NEXT: movq %rcx, 8(%rdi)
255; SSE2-NEXT: movq %rcx, (%rdi)
256; SSE2-NEXT: retq
257;
258; AVX1-LABEL: memset_128_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000259; AVX1: vmovd %esi, %xmm0
260; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
261; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000262; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
263; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
264; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
265; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
266; AVX1-NEXT: vmovups %ymm0, (%rdi)
267; AVX1-NEXT: vzeroupper
268; AVX1-NEXT: retq
269;
270; AVX2-LABEL: memset_128_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000271; AVX2: vmovd %esi, %xmm0
272; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
273; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
274; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
275; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
276; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000277; AVX2-NEXT: vzeroupper
278; AVX2-NEXT: retq
279;
280 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i32 1, i1 false)
281 ret void
282}
283
284define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
285; SSE2-LABEL: memset_256_nonconst_bytes:
286; SSE2: movl $256, %edx # imm = 0x100
287; SSE2-NEXT: jmp memset # TAILCALL
288;
289; AVX1-LABEL: memset_256_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000290; AVX1: vmovd %esi, %xmm0
291; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
292; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000293; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
294; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
295; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
296; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
297; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
298; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
299; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
300; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
301; AVX1-NEXT: vmovups %ymm0, (%rdi)
302; AVX1-NEXT: vzeroupper
303; AVX1-NEXT: retq
304;
305; AVX2-LABEL: memset_256_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000306; AVX2: vmovd %esi, %xmm0
307; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
308; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi)
309; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi)
310; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi)
311; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi)
312; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
313; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
314; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
315; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000316; AVX2-NEXT: vzeroupper
317; AVX2-NEXT: retq
318;
319 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i32 1, i1 false)
320 ret void
321}
322
323declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
324