blob: 6fe720a4b9413c62f55fa6aa05198654c0864dfe [file] [log] [blame]
Sanjay Patel7c615072016-03-29 17:09:27 +00001; NOTE: Assertions have been autogenerated by update_test_checks.py
Sanjay Pateld3e3d482016-04-01 18:11:30 +00002; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=ANY --check-prefix=SSE --check-prefix=SSE1
3; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=SSE --check-prefix=SSE2
Sanjay Patel7c615072016-03-29 17:09:27 +00004; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX2
6
Sanjay Patel61e13242016-03-31 20:40:32 +00007; https://llvm.org/bugs/show_bug.cgi?id=27100
8
Sanjay Patel7c615072016-03-29 17:09:27 +00009define void @memset_16_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000010; SSE-LABEL: memset_16_nonzero_bytes:
11; SSE: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
12; SSE-NEXT: movq %rax, 8(%rdi)
13; SSE-NEXT: movq %rax, (%rdi)
14; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000015;
Sanjay Patel9f413362016-04-01 17:36:45 +000016; AVX-LABEL: memset_16_nonzero_bytes:
17; AVX: vmovaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
18; AVX-NEXT: vmovups %xmm0, (%rdi)
19; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000020;
21 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 16, i64 -1)
22 ret void
23}
24
25define void @memset_32_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000026; SSE-LABEL: memset_32_nonzero_bytes:
27; SSE: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
28; SSE-NEXT: movq %rax, 24(%rdi)
29; SSE-NEXT: movq %rax, 16(%rdi)
30; SSE-NEXT: movq %rax, 8(%rdi)
31; SSE-NEXT: movq %rax, (%rdi)
32; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000033;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000034; AVX-LABEL: memset_32_nonzero_bytes:
35; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
36; AVX-NEXT: vmovups %ymm0, (%rdi)
37; AVX-NEXT: vzeroupper
38; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000039;
40 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 32, i64 -1)
41 ret void
42}
43
44define void @memset_64_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000045; SSE-LABEL: memset_64_nonzero_bytes:
46; SSE: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
47; SSE-NEXT: movq %rax, 56(%rdi)
48; SSE-NEXT: movq %rax, 48(%rdi)
49; SSE-NEXT: movq %rax, 40(%rdi)
50; SSE-NEXT: movq %rax, 32(%rdi)
51; SSE-NEXT: movq %rax, 24(%rdi)
52; SSE-NEXT: movq %rax, 16(%rdi)
53; SSE-NEXT: movq %rax, 8(%rdi)
54; SSE-NEXT: movq %rax, (%rdi)
55; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000056;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000057; AVX-LABEL: memset_64_nonzero_bytes:
58; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
59; AVX-NEXT: vmovups %ymm0, 32(%rdi)
60; AVX-NEXT: vmovups %ymm0, (%rdi)
61; AVX-NEXT: vzeroupper
62; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000063;
64 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1)
65 ret void
66}
67
68define void @memset_128_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000069; SSE-LABEL: memset_128_nonzero_bytes:
70; SSE: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
71; SSE-NEXT: movq %rax, 120(%rdi)
72; SSE-NEXT: movq %rax, 112(%rdi)
73; SSE-NEXT: movq %rax, 104(%rdi)
74; SSE-NEXT: movq %rax, 96(%rdi)
75; SSE-NEXT: movq %rax, 88(%rdi)
76; SSE-NEXT: movq %rax, 80(%rdi)
77; SSE-NEXT: movq %rax, 72(%rdi)
78; SSE-NEXT: movq %rax, 64(%rdi)
79; SSE-NEXT: movq %rax, 56(%rdi)
80; SSE-NEXT: movq %rax, 48(%rdi)
81; SSE-NEXT: movq %rax, 40(%rdi)
82; SSE-NEXT: movq %rax, 32(%rdi)
83; SSE-NEXT: movq %rax, 24(%rdi)
84; SSE-NEXT: movq %rax, 16(%rdi)
85; SSE-NEXT: movq %rax, 8(%rdi)
86; SSE-NEXT: movq %rax, (%rdi)
87; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000088;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000089; AVX-LABEL: memset_128_nonzero_bytes:
90; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
91; AVX-NEXT: vmovups %ymm0, 96(%rdi)
92; AVX-NEXT: vmovups %ymm0, 64(%rdi)
93; AVX-NEXT: vmovups %ymm0, 32(%rdi)
94; AVX-NEXT: vmovups %ymm0, (%rdi)
95; AVX-NEXT: vzeroupper
96; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000097;
98 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1)
99 ret void
100}
101
102define void @memset_256_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000103; SSE-LABEL: memset_256_nonzero_bytes:
104; SSE: pushq %rax
105; SSE-NEXT: .Ltmp0:
106; SSE-NEXT: .cfi_def_cfa_offset 16
107; SSE-NEXT: movl $42, %esi
108; SSE-NEXT: movl $256, %edx # imm = 0x100
109; SSE-NEXT: callq memset
110; SSE-NEXT: popq %rax
111; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +0000112;
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000113; AVX-LABEL: memset_256_nonzero_bytes:
114; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
115; AVX-NEXT: vmovups %ymm0, 224(%rdi)
116; AVX-NEXT: vmovups %ymm0, 192(%rdi)
117; AVX-NEXT: vmovups %ymm0, 160(%rdi)
118; AVX-NEXT: vmovups %ymm0, 128(%rdi)
119; AVX-NEXT: vmovups %ymm0, 96(%rdi)
120; AVX-NEXT: vmovups %ymm0, 64(%rdi)
121; AVX-NEXT: vmovups %ymm0, 32(%rdi)
122; AVX-NEXT: vmovups %ymm0, (%rdi)
123; AVX-NEXT: vzeroupper
124; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000125;
126 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1)
127 ret void
128}
129
130declare i8* @__memset_chk(i8*, i32, i64, i64)
131
Sanjay Patel61e13242016-03-31 20:40:32 +0000132; Repeat with a non-constant value for the stores.
133
134define void @memset_16_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000135; SSE-LABEL: memset_16_nonconst_bytes:
136; SSE: movzbl %sil, %eax
137; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
138; SSE-NEXT: imulq %rax, %rcx
139; SSE-NEXT: movq %rcx, 8(%rdi)
140; SSE-NEXT: movq %rcx, (%rdi)
141; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000142;
143; AVX1-LABEL: memset_16_nonconst_bytes:
Sanjay Patel9f413362016-04-01 17:36:45 +0000144; AVX1: vmovd %esi, %xmm0
145; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
146; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000147; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
148; AVX1-NEXT: retq
149;
150; AVX2-LABEL: memset_16_nonconst_bytes:
Sanjay Patel9f413362016-04-01 17:36:45 +0000151; AVX2: vmovd %esi, %xmm0
152; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
153; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000154; AVX2-NEXT: retq
155;
156 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i32 1, i1 false)
157 ret void
158}
159
160define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000161; SSE-LABEL: memset_32_nonconst_bytes:
162; SSE: movzbl %sil, %eax
163; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
164; SSE-NEXT: imulq %rax, %rcx
165; SSE-NEXT: movq %rcx, 24(%rdi)
166; SSE-NEXT: movq %rcx, 16(%rdi)
167; SSE-NEXT: movq %rcx, 8(%rdi)
168; SSE-NEXT: movq %rcx, (%rdi)
169; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000170;
171; AVX1-LABEL: memset_32_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000172; AVX1: vmovd %esi, %xmm0
173; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
174; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000175; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
176; AVX1-NEXT: vmovups %ymm0, (%rdi)
177; AVX1-NEXT: vzeroupper
178; AVX1-NEXT: retq
179;
180; AVX2-LABEL: memset_32_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000181; AVX2: vmovd %esi, %xmm0
182; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
183; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000184; AVX2-NEXT: vzeroupper
185; AVX2-NEXT: retq
186;
187 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i32 1, i1 false)
188 ret void
189}
190
191define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000192; SSE-LABEL: memset_64_nonconst_bytes:
193; SSE: movzbl %sil, %eax
194; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
195; SSE-NEXT: imulq %rax, %rcx
196; SSE-NEXT: movq %rcx, 56(%rdi)
197; SSE-NEXT: movq %rcx, 48(%rdi)
198; SSE-NEXT: movq %rcx, 40(%rdi)
199; SSE-NEXT: movq %rcx, 32(%rdi)
200; SSE-NEXT: movq %rcx, 24(%rdi)
201; SSE-NEXT: movq %rcx, 16(%rdi)
202; SSE-NEXT: movq %rcx, 8(%rdi)
203; SSE-NEXT: movq %rcx, (%rdi)
204; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000205;
206; AVX1-LABEL: memset_64_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000207; AVX1: vmovd %esi, %xmm0
208; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
209; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000210; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
211; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
212; AVX1-NEXT: vmovups %ymm0, (%rdi)
213; AVX1-NEXT: vzeroupper
214; AVX1-NEXT: retq
215;
216; AVX2-LABEL: memset_64_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000217; AVX2: vmovd %esi, %xmm0
218; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
219; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
220; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000221; AVX2-NEXT: vzeroupper
222; AVX2-NEXT: retq
223;
224 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i32 1, i1 false)
225 ret void
226}
227
228define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000229; SSE-LABEL: memset_128_nonconst_bytes:
230; SSE: movzbl %sil, %eax
231; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
232; SSE-NEXT: imulq %rax, %rcx
233; SSE-NEXT: movq %rcx, 120(%rdi)
234; SSE-NEXT: movq %rcx, 112(%rdi)
235; SSE-NEXT: movq %rcx, 104(%rdi)
236; SSE-NEXT: movq %rcx, 96(%rdi)
237; SSE-NEXT: movq %rcx, 88(%rdi)
238; SSE-NEXT: movq %rcx, 80(%rdi)
239; SSE-NEXT: movq %rcx, 72(%rdi)
240; SSE-NEXT: movq %rcx, 64(%rdi)
241; SSE-NEXT: movq %rcx, 56(%rdi)
242; SSE-NEXT: movq %rcx, 48(%rdi)
243; SSE-NEXT: movq %rcx, 40(%rdi)
244; SSE-NEXT: movq %rcx, 32(%rdi)
245; SSE-NEXT: movq %rcx, 24(%rdi)
246; SSE-NEXT: movq %rcx, 16(%rdi)
247; SSE-NEXT: movq %rcx, 8(%rdi)
248; SSE-NEXT: movq %rcx, (%rdi)
249; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000250;
251; AVX1-LABEL: memset_128_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000252; AVX1: vmovd %esi, %xmm0
253; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
254; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000255; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
256; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
257; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
258; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
259; AVX1-NEXT: vmovups %ymm0, (%rdi)
260; AVX1-NEXT: vzeroupper
261; AVX1-NEXT: retq
262;
263; AVX2-LABEL: memset_128_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000264; AVX2: vmovd %esi, %xmm0
265; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
266; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
267; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
268; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
269; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000270; AVX2-NEXT: vzeroupper
271; AVX2-NEXT: retq
272;
273 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i32 1, i1 false)
274 ret void
275}
276
277define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000278; SSE-LABEL: memset_256_nonconst_bytes:
279; SSE: movl $256, %edx # imm = 0x100
280; SSE-NEXT: jmp memset # TAILCALL
Sanjay Patel61e13242016-03-31 20:40:32 +0000281;
282; AVX1-LABEL: memset_256_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000283; AVX1: vmovd %esi, %xmm0
284; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
285; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000286; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
287; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
288; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
289; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
290; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
291; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
292; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
293; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
294; AVX1-NEXT: vmovups %ymm0, (%rdi)
295; AVX1-NEXT: vzeroupper
296; AVX1-NEXT: retq
297;
298; AVX2-LABEL: memset_256_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000299; AVX2: vmovd %esi, %xmm0
300; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
301; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi)
302; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi)
303; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi)
304; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi)
305; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
306; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
307; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
308; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000309; AVX2-NEXT: vzeroupper
310; AVX2-NEXT: retq
311;
312 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i32 1, i1 false)
313 ret void
314}
315
316declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
317