blob: a193fbb9b5e354b8118a7e5c30176d2f36fa68a5 [file] [log] [blame]
Sanjay Patel7c615072016-03-29 17:09:27 +00001; NOTE: Assertions have been autogenerated by update_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=SSE2
3; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX2
5
Sanjay Patel61e13242016-03-31 20:40:32 +00006; https://llvm.org/bugs/show_bug.cgi?id=27100
7
Sanjay Patel7c615072016-03-29 17:09:27 +00008define void @memset_16_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +00009; SSE2-LABEL: memset_16_nonzero_bytes:
10; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
11; SSE2-NEXT: movq %rax, 8(%rdi)
12; SSE2-NEXT: movq %rax, (%rdi)
13; SSE2-NEXT: retq
14;
15; AVX1-LABEL: memset_16_nonzero_bytes:
16; AVX1: vmovaps {{.*#+}} xmm0 = [707406378,707406378,707406378,707406378]
17; AVX1-NEXT: vmovups %xmm0, (%rdi)
18; AVX1-NEXT: retq
19;
20; AVX2-LABEL: memset_16_nonzero_bytes:
21; AVX2: vbroadcastss {{.*}}(%rip), %xmm0
22; AVX2-NEXT: vmovups %xmm0, (%rdi)
23; AVX2-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000024;
25 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 16, i64 -1)
26 ret void
27}
28
29define void @memset_32_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000030; SSE2-LABEL: memset_32_nonzero_bytes:
31; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
32; SSE2-NEXT: movq %rax, 24(%rdi)
33; SSE2-NEXT: movq %rax, 16(%rdi)
34; SSE2-NEXT: movq %rax, 8(%rdi)
35; SSE2-NEXT: movq %rax, (%rdi)
36; SSE2-NEXT: retq
37;
38; AVX1-LABEL: memset_32_nonzero_bytes:
39; AVX1: vmovaps {{.*#+}} ymm0 = [1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13]
40; AVX1-NEXT: vmovups %ymm0, (%rdi)
41; AVX1-NEXT: vzeroupper
42; AVX1-NEXT: retq
43;
44; AVX2-LABEL: memset_32_nonzero_bytes:
45; AVX2: vbroadcastss {{.*}}(%rip), %ymm0
46; AVX2-NEXT: vmovups %ymm0, (%rdi)
47; AVX2-NEXT: vzeroupper
48; AVX2-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000049;
50 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 32, i64 -1)
51 ret void
52}
53
54define void @memset_64_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000055; SSE2-LABEL: memset_64_nonzero_bytes:
56; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
57; SSE2-NEXT: movq %rax, 56(%rdi)
58; SSE2-NEXT: movq %rax, 48(%rdi)
59; SSE2-NEXT: movq %rax, 40(%rdi)
60; SSE2-NEXT: movq %rax, 32(%rdi)
61; SSE2-NEXT: movq %rax, 24(%rdi)
62; SSE2-NEXT: movq %rax, 16(%rdi)
63; SSE2-NEXT: movq %rax, 8(%rdi)
64; SSE2-NEXT: movq %rax, (%rdi)
65; SSE2-NEXT: retq
66;
67; AVX1-LABEL: memset_64_nonzero_bytes:
68; AVX1: vmovaps {{.*#+}} ymm0 = [1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13]
69; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
70; AVX1-NEXT: vmovups %ymm0, (%rdi)
71; AVX1-NEXT: vzeroupper
72; AVX1-NEXT: retq
73;
74; AVX2-LABEL: memset_64_nonzero_bytes:
75; AVX2: vbroadcastss {{.*}}(%rip), %ymm0
76; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
77; AVX2-NEXT: vmovups %ymm0, (%rdi)
78; AVX2-NEXT: vzeroupper
79; AVX2-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000080;
81 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1)
82 ret void
83}
84
85define void @memset_128_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000086; SSE2-LABEL: memset_128_nonzero_bytes:
87; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
88; SSE2-NEXT: movq %rax, 120(%rdi)
89; SSE2-NEXT: movq %rax, 112(%rdi)
90; SSE2-NEXT: movq %rax, 104(%rdi)
91; SSE2-NEXT: movq %rax, 96(%rdi)
92; SSE2-NEXT: movq %rax, 88(%rdi)
93; SSE2-NEXT: movq %rax, 80(%rdi)
94; SSE2-NEXT: movq %rax, 72(%rdi)
95; SSE2-NEXT: movq %rax, 64(%rdi)
96; SSE2-NEXT: movq %rax, 56(%rdi)
97; SSE2-NEXT: movq %rax, 48(%rdi)
98; SSE2-NEXT: movq %rax, 40(%rdi)
99; SSE2-NEXT: movq %rax, 32(%rdi)
100; SSE2-NEXT: movq %rax, 24(%rdi)
101; SSE2-NEXT: movq %rax, 16(%rdi)
102; SSE2-NEXT: movq %rax, 8(%rdi)
103; SSE2-NEXT: movq %rax, (%rdi)
104; SSE2-NEXT: retq
105;
106; AVX1-LABEL: memset_128_nonzero_bytes:
107; AVX1: vmovaps {{.*#+}} ymm0 = [1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13]
108; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
109; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
110; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
111; AVX1-NEXT: vmovups %ymm0, (%rdi)
112; AVX1-NEXT: vzeroupper
113; AVX1-NEXT: retq
114;
115; AVX2-LABEL: memset_128_nonzero_bytes:
116; AVX2: vbroadcastss {{.*}}(%rip), %ymm0
117; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
118; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
119; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
120; AVX2-NEXT: vmovups %ymm0, (%rdi)
121; AVX2-NEXT: vzeroupper
122; AVX2-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000123;
124 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1)
125 ret void
126}
127
128define void @memset_256_nonzero_bytes(i8* %x) {
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +0000129; SSE2-LABEL: memset_256_nonzero_bytes:
130; SSE2: pushq %rax
131; SSE2-NEXT: .Ltmp0:
132; SSE2-NEXT: .cfi_def_cfa_offset 16
133; SSE2-NEXT: movl $42, %esi
134; SSE2-NEXT: movl $256, %edx # imm = 0x100
135; SSE2-NEXT: callq memset
136; SSE2-NEXT: popq %rax
137; SSE2-NEXT: retq
138;
139; AVX1-LABEL: memset_256_nonzero_bytes:
140; AVX1: vmovaps {{.*#+}} ymm0 = [1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13,1.511366e-13]
141; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
142; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
143; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
144; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
145; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
146; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
147; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
148; AVX1-NEXT: vmovups %ymm0, (%rdi)
149; AVX1-NEXT: vzeroupper
150; AVX1-NEXT: retq
151;
152; AVX2-LABEL: memset_256_nonzero_bytes:
153; AVX2: vbroadcastss {{.*}}(%rip), %ymm0
154; AVX2-NEXT: vmovups %ymm0, 224(%rdi)
155; AVX2-NEXT: vmovups %ymm0, 192(%rdi)
156; AVX2-NEXT: vmovups %ymm0, 160(%rdi)
157; AVX2-NEXT: vmovups %ymm0, 128(%rdi)
158; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
159; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
160; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
161; AVX2-NEXT: vmovups %ymm0, (%rdi)
162; AVX2-NEXT: vzeroupper
163; AVX2-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000164;
165 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1)
166 ret void
167}
168
169declare i8* @__memset_chk(i8*, i32, i64, i64)
170
Sanjay Patel61e13242016-03-31 20:40:32 +0000171; Repeat with a non-constant value for the stores.
172
173define void @memset_16_nonconst_bytes(i8* %x, i8 %c) {
174; SSE2-LABEL: memset_16_nonconst_bytes:
175; SSE2: movzbl %sil, %eax
176; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
177; SSE2-NEXT: imulq %rax, %rcx
178; SSE2-NEXT: movq %rcx, 8(%rdi)
179; SSE2-NEXT: movq %rcx, (%rdi)
180; SSE2-NEXT: retq
181;
182; AVX1-LABEL: memset_16_nonconst_bytes:
183; AVX1: movzbl %sil, %eax
184; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
185; AVX1-NEXT: vmovd %eax, %xmm0
186; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
187; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
188; AVX1-NEXT: retq
189;
190; AVX2-LABEL: memset_16_nonconst_bytes:
191; AVX2: movzbl %sil, %eax
192; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
193; AVX2-NEXT: vmovd %eax, %xmm0
194; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
195; AVX2-NEXT: vmovups %xmm0, (%rdi)
196; AVX2-NEXT: retq
197;
198 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i32 1, i1 false)
199 ret void
200}
201
202define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
203; SSE2-LABEL: memset_32_nonconst_bytes:
204; SSE2: movzbl %sil, %eax
205; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
206; SSE2-NEXT: imulq %rax, %rcx
207; SSE2-NEXT: movq %rcx, 24(%rdi)
208; SSE2-NEXT: movq %rcx, 16(%rdi)
209; SSE2-NEXT: movq %rcx, 8(%rdi)
210; SSE2-NEXT: movq %rcx, (%rdi)
211; SSE2-NEXT: retq
212;
213; AVX1-LABEL: memset_32_nonconst_bytes:
214; AVX1: movzbl %sil, %eax
215; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
216; AVX1-NEXT: vmovd %eax, %xmm0
217; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
218; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
219; AVX1-NEXT: vmovups %ymm0, (%rdi)
220; AVX1-NEXT: vzeroupper
221; AVX1-NEXT: retq
222;
223; AVX2-LABEL: memset_32_nonconst_bytes:
224; AVX2: movzbl %sil, %eax
225; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
226; AVX2-NEXT: vmovd %eax, %xmm0
227; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
228; AVX2-NEXT: vmovups %ymm0, (%rdi)
229; AVX2-NEXT: vzeroupper
230; AVX2-NEXT: retq
231;
232 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i32 1, i1 false)
233 ret void
234}
235
236define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
237; SSE2-LABEL: memset_64_nonconst_bytes:
238; SSE2: movzbl %sil, %eax
239; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
240; SSE2-NEXT: imulq %rax, %rcx
241; SSE2-NEXT: movq %rcx, 56(%rdi)
242; SSE2-NEXT: movq %rcx, 48(%rdi)
243; SSE2-NEXT: movq %rcx, 40(%rdi)
244; SSE2-NEXT: movq %rcx, 32(%rdi)
245; SSE2-NEXT: movq %rcx, 24(%rdi)
246; SSE2-NEXT: movq %rcx, 16(%rdi)
247; SSE2-NEXT: movq %rcx, 8(%rdi)
248; SSE2-NEXT: movq %rcx, (%rdi)
249; SSE2-NEXT: retq
250;
251; AVX1-LABEL: memset_64_nonconst_bytes:
252; AVX1: movzbl %sil, %eax
253; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
254; AVX1-NEXT: vmovd %eax, %xmm0
255; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
256; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
257; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
258; AVX1-NEXT: vmovups %ymm0, (%rdi)
259; AVX1-NEXT: vzeroupper
260; AVX1-NEXT: retq
261;
262; AVX2-LABEL: memset_64_nonconst_bytes:
263; AVX2: movzbl %sil, %eax
264; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
265; AVX2-NEXT: vmovd %eax, %xmm0
266; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
267; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
268; AVX2-NEXT: vmovups %ymm0, (%rdi)
269; AVX2-NEXT: vzeroupper
270; AVX2-NEXT: retq
271;
272 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i32 1, i1 false)
273 ret void
274}
275
276define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
277; SSE2-LABEL: memset_128_nonconst_bytes:
278; SSE2: movzbl %sil, %eax
279; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
280; SSE2-NEXT: imulq %rax, %rcx
281; SSE2-NEXT: movq %rcx, 120(%rdi)
282; SSE2-NEXT: movq %rcx, 112(%rdi)
283; SSE2-NEXT: movq %rcx, 104(%rdi)
284; SSE2-NEXT: movq %rcx, 96(%rdi)
285; SSE2-NEXT: movq %rcx, 88(%rdi)
286; SSE2-NEXT: movq %rcx, 80(%rdi)
287; SSE2-NEXT: movq %rcx, 72(%rdi)
288; SSE2-NEXT: movq %rcx, 64(%rdi)
289; SSE2-NEXT: movq %rcx, 56(%rdi)
290; SSE2-NEXT: movq %rcx, 48(%rdi)
291; SSE2-NEXT: movq %rcx, 40(%rdi)
292; SSE2-NEXT: movq %rcx, 32(%rdi)
293; SSE2-NEXT: movq %rcx, 24(%rdi)
294; SSE2-NEXT: movq %rcx, 16(%rdi)
295; SSE2-NEXT: movq %rcx, 8(%rdi)
296; SSE2-NEXT: movq %rcx, (%rdi)
297; SSE2-NEXT: retq
298;
299; AVX1-LABEL: memset_128_nonconst_bytes:
300; AVX1: movzbl %sil, %eax
301; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
302; AVX1-NEXT: vmovd %eax, %xmm0
303; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
304; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
305; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
306; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
307; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
308; AVX1-NEXT: vmovups %ymm0, (%rdi)
309; AVX1-NEXT: vzeroupper
310; AVX1-NEXT: retq
311;
312; AVX2-LABEL: memset_128_nonconst_bytes:
313; AVX2: movzbl %sil, %eax
314; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
315; AVX2-NEXT: vmovd %eax, %xmm0
316; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
317; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
318; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
319; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
320; AVX2-NEXT: vmovups %ymm0, (%rdi)
321; AVX2-NEXT: vzeroupper
322; AVX2-NEXT: retq
323;
324 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i32 1, i1 false)
325 ret void
326}
327
328define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
329; SSE2-LABEL: memset_256_nonconst_bytes:
330; SSE2: movl $256, %edx # imm = 0x100
331; SSE2-NEXT: jmp memset # TAILCALL
332;
333; AVX1-LABEL: memset_256_nonconst_bytes:
334; AVX1: movzbl %sil, %eax
335; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
336; AVX1-NEXT: vmovd %eax, %xmm0
337; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
338; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
339; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
340; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
341; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
342; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
343; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
344; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
345; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
346; AVX1-NEXT: vmovups %ymm0, (%rdi)
347; AVX1-NEXT: vzeroupper
348; AVX1-NEXT: retq
349;
350; AVX2-LABEL: memset_256_nonconst_bytes:
351; AVX2: movzbl %sil, %eax
352; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
353; AVX2-NEXT: vmovd %eax, %xmm0
354; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
355; AVX2-NEXT: vmovups %ymm0, 224(%rdi)
356; AVX2-NEXT: vmovups %ymm0, 192(%rdi)
357; AVX2-NEXT: vmovups %ymm0, 160(%rdi)
358; AVX2-NEXT: vmovups %ymm0, 128(%rdi)
359; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
360; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
361; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
362; AVX2-NEXT: vmovups %ymm0, (%rdi)
363; AVX2-NEXT: vzeroupper
364; AVX2-NEXT: retq
365;
366 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i32 1, i1 false)
367 ret void
368}
369
370declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
371