blob: 324c767ce88f93ef4a2a8a341d9e0931a86b1010 [file] [log] [blame]
Sanjay Patel7c615072016-03-29 17:09:27 +00001; NOTE: Assertions have been autogenerated by update_test_checks.py
Sanjay Patel9b5b5c82016-04-01 18:29:25 +00002; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE
3; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=SSE
4; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2,-slow-unaligned-mem-16 | FileCheck %s --check-prefix=SSE2FAST
5; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
Sanjay Patel7c615072016-03-29 17:09:27 +00007
Sanjay Patel61e13242016-03-31 20:40:32 +00008; https://llvm.org/bugs/show_bug.cgi?id=27100
9
Sanjay Patel7c615072016-03-29 17:09:27 +000010define void @memset_16_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000011; SSE-LABEL: memset_16_nonzero_bytes:
12; SSE: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
13; SSE-NEXT: movq %rax, 8(%rdi)
14; SSE-NEXT: movq %rax, (%rdi)
15; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000016;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +000017; SSE2FAST-LABEL: memset_16_nonzero_bytes:
18; SSE2FAST: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
19; SSE2FAST-NEXT: movups %xmm0, (%rdi)
20; SSE2FAST-NEXT: retq
21;
Sanjay Patel9f413362016-04-01 17:36:45 +000022; AVX-LABEL: memset_16_nonzero_bytes:
23; AVX: vmovaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
24; AVX-NEXT: vmovups %xmm0, (%rdi)
25; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000026;
27 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 16, i64 -1)
28 ret void
29}
30
31define void @memset_32_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000032; SSE-LABEL: memset_32_nonzero_bytes:
33; SSE: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
34; SSE-NEXT: movq %rax, 24(%rdi)
35; SSE-NEXT: movq %rax, 16(%rdi)
36; SSE-NEXT: movq %rax, 8(%rdi)
37; SSE-NEXT: movq %rax, (%rdi)
38; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000039;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +000040; SSE2FAST-LABEL: memset_32_nonzero_bytes:
41; SSE2FAST: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
42; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
43; SSE2FAST-NEXT: movups %xmm0, (%rdi)
44; SSE2FAST-NEXT: retq
45;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000046; AVX-LABEL: memset_32_nonzero_bytes:
47; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
48; AVX-NEXT: vmovups %ymm0, (%rdi)
49; AVX-NEXT: vzeroupper
50; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000051;
52 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 32, i64 -1)
53 ret void
54}
55
56define void @memset_64_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000057; SSE-LABEL: memset_64_nonzero_bytes:
58; SSE: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
59; SSE-NEXT: movq %rax, 56(%rdi)
60; SSE-NEXT: movq %rax, 48(%rdi)
61; SSE-NEXT: movq %rax, 40(%rdi)
62; SSE-NEXT: movq %rax, 32(%rdi)
63; SSE-NEXT: movq %rax, 24(%rdi)
64; SSE-NEXT: movq %rax, 16(%rdi)
65; SSE-NEXT: movq %rax, 8(%rdi)
66; SSE-NEXT: movq %rax, (%rdi)
67; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000068;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +000069; SSE2FAST-LABEL: memset_64_nonzero_bytes:
70; SSE2FAST: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
71; SSE2FAST-NEXT: movups %xmm0, 48(%rdi)
72; SSE2FAST-NEXT: movups %xmm0, 32(%rdi)
73; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
74; SSE2FAST-NEXT: movups %xmm0, (%rdi)
75; SSE2FAST-NEXT: retq
76;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000077; AVX-LABEL: memset_64_nonzero_bytes:
78; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
79; AVX-NEXT: vmovups %ymm0, 32(%rdi)
80; AVX-NEXT: vmovups %ymm0, (%rdi)
81; AVX-NEXT: vzeroupper
82; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000083;
84 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1)
85 ret void
86}
87
88define void @memset_128_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000089; SSE-LABEL: memset_128_nonzero_bytes:
90; SSE: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
91; SSE-NEXT: movq %rax, 120(%rdi)
92; SSE-NEXT: movq %rax, 112(%rdi)
93; SSE-NEXT: movq %rax, 104(%rdi)
94; SSE-NEXT: movq %rax, 96(%rdi)
95; SSE-NEXT: movq %rax, 88(%rdi)
96; SSE-NEXT: movq %rax, 80(%rdi)
97; SSE-NEXT: movq %rax, 72(%rdi)
98; SSE-NEXT: movq %rax, 64(%rdi)
99; SSE-NEXT: movq %rax, 56(%rdi)
100; SSE-NEXT: movq %rax, 48(%rdi)
101; SSE-NEXT: movq %rax, 40(%rdi)
102; SSE-NEXT: movq %rax, 32(%rdi)
103; SSE-NEXT: movq %rax, 24(%rdi)
104; SSE-NEXT: movq %rax, 16(%rdi)
105; SSE-NEXT: movq %rax, 8(%rdi)
106; SSE-NEXT: movq %rax, (%rdi)
107; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +0000108;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000109; SSE2FAST-LABEL: memset_128_nonzero_bytes:
110; SSE2FAST: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
111; SSE2FAST-NEXT: movups %xmm0, 112(%rdi)
112; SSE2FAST-NEXT: movups %xmm0, 96(%rdi)
113; SSE2FAST-NEXT: movups %xmm0, 80(%rdi)
114; SSE2FAST-NEXT: movups %xmm0, 64(%rdi)
115; SSE2FAST-NEXT: movups %xmm0, 48(%rdi)
116; SSE2FAST-NEXT: movups %xmm0, 32(%rdi)
117; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
118; SSE2FAST-NEXT: movups %xmm0, (%rdi)
119; SSE2FAST-NEXT: retq
120;
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000121; AVX-LABEL: memset_128_nonzero_bytes:
122; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
123; AVX-NEXT: vmovups %ymm0, 96(%rdi)
124; AVX-NEXT: vmovups %ymm0, 64(%rdi)
125; AVX-NEXT: vmovups %ymm0, 32(%rdi)
126; AVX-NEXT: vmovups %ymm0, (%rdi)
127; AVX-NEXT: vzeroupper
128; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000129;
130 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1)
131 ret void
132}
133
134define void @memset_256_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000135; SSE-LABEL: memset_256_nonzero_bytes:
136; SSE: pushq %rax
137; SSE-NEXT: .Ltmp0:
138; SSE-NEXT: .cfi_def_cfa_offset 16
139; SSE-NEXT: movl $42, %esi
140; SSE-NEXT: movl $256, %edx # imm = 0x100
141; SSE-NEXT: callq memset
142; SSE-NEXT: popq %rax
143; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +0000144;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000145; SSE2FAST-LABEL: memset_256_nonzero_bytes:
146; SSE2FAST: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
147; SSE2FAST-NEXT: movups %xmm0, 240(%rdi)
148; SSE2FAST-NEXT: movups %xmm0, 224(%rdi)
149; SSE2FAST-NEXT: movups %xmm0, 208(%rdi)
150; SSE2FAST-NEXT: movups %xmm0, 192(%rdi)
151; SSE2FAST-NEXT: movups %xmm0, 176(%rdi)
152; SSE2FAST-NEXT: movups %xmm0, 160(%rdi)
153; SSE2FAST-NEXT: movups %xmm0, 144(%rdi)
154; SSE2FAST-NEXT: movups %xmm0, 128(%rdi)
155; SSE2FAST-NEXT: movups %xmm0, 112(%rdi)
156; SSE2FAST-NEXT: movups %xmm0, 96(%rdi)
157; SSE2FAST-NEXT: movups %xmm0, 80(%rdi)
158; SSE2FAST-NEXT: movups %xmm0, 64(%rdi)
159; SSE2FAST-NEXT: movups %xmm0, 48(%rdi)
160; SSE2FAST-NEXT: movups %xmm0, 32(%rdi)
161; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
162; SSE2FAST-NEXT: movups %xmm0, (%rdi)
163; SSE2FAST-NEXT: retq
164;
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000165; AVX-LABEL: memset_256_nonzero_bytes:
166; AVX: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
167; AVX-NEXT: vmovups %ymm0, 224(%rdi)
168; AVX-NEXT: vmovups %ymm0, 192(%rdi)
169; AVX-NEXT: vmovups %ymm0, 160(%rdi)
170; AVX-NEXT: vmovups %ymm0, 128(%rdi)
171; AVX-NEXT: vmovups %ymm0, 96(%rdi)
172; AVX-NEXT: vmovups %ymm0, 64(%rdi)
173; AVX-NEXT: vmovups %ymm0, 32(%rdi)
174; AVX-NEXT: vmovups %ymm0, (%rdi)
175; AVX-NEXT: vzeroupper
176; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000177;
178 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1)
179 ret void
180}
181
182declare i8* @__memset_chk(i8*, i32, i64, i64)
183
Sanjay Patel61e13242016-03-31 20:40:32 +0000184; Repeat with a non-constant value for the stores.
185
186define void @memset_16_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000187; SSE-LABEL: memset_16_nonconst_bytes:
188; SSE: movzbl %sil, %eax
189; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
190; SSE-NEXT: imulq %rax, %rcx
191; SSE-NEXT: movq %rcx, 8(%rdi)
192; SSE-NEXT: movq %rcx, (%rdi)
193; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000194;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000195; SSE2FAST-LABEL: memset_16_nonconst_bytes:
196; SSE2FAST: movd %esi, %xmm0
197; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
198; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
199; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
200; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
201; SSE2FAST-NEXT: retq
202;
Sanjay Patel61e13242016-03-31 20:40:32 +0000203; AVX1-LABEL: memset_16_nonconst_bytes:
Sanjay Patel9f413362016-04-01 17:36:45 +0000204; AVX1: vmovd %esi, %xmm0
205; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
206; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000207; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
208; AVX1-NEXT: retq
209;
210; AVX2-LABEL: memset_16_nonconst_bytes:
Sanjay Patel9f413362016-04-01 17:36:45 +0000211; AVX2: vmovd %esi, %xmm0
212; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
213; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000214; AVX2-NEXT: retq
215;
216 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i32 1, i1 false)
217 ret void
218}
219
220define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000221; SSE-LABEL: memset_32_nonconst_bytes:
222; SSE: movzbl %sil, %eax
223; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
224; SSE-NEXT: imulq %rax, %rcx
225; SSE-NEXT: movq %rcx, 24(%rdi)
226; SSE-NEXT: movq %rcx, 16(%rdi)
227; SSE-NEXT: movq %rcx, 8(%rdi)
228; SSE-NEXT: movq %rcx, (%rdi)
229; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000230;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000231; SSE2FAST-LABEL: memset_32_nonconst_bytes:
232; SSE2FAST: movd %esi, %xmm0
233; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
234; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
235; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
236; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
237; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
238; SSE2FAST-NEXT: retq
239;
Sanjay Patel61e13242016-03-31 20:40:32 +0000240; AVX1-LABEL: memset_32_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000241; AVX1: vmovd %esi, %xmm0
242; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
243; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000244; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
245; AVX1-NEXT: vmovups %ymm0, (%rdi)
246; AVX1-NEXT: vzeroupper
247; AVX1-NEXT: retq
248;
249; AVX2-LABEL: memset_32_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000250; AVX2: vmovd %esi, %xmm0
251; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
252; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000253; AVX2-NEXT: vzeroupper
254; AVX2-NEXT: retq
255;
256 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i32 1, i1 false)
257 ret void
258}
259
260define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000261; SSE-LABEL: memset_64_nonconst_bytes:
262; SSE: movzbl %sil, %eax
263; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
264; SSE-NEXT: imulq %rax, %rcx
265; SSE-NEXT: movq %rcx, 56(%rdi)
266; SSE-NEXT: movq %rcx, 48(%rdi)
267; SSE-NEXT: movq %rcx, 40(%rdi)
268; SSE-NEXT: movq %rcx, 32(%rdi)
269; SSE-NEXT: movq %rcx, 24(%rdi)
270; SSE-NEXT: movq %rcx, 16(%rdi)
271; SSE-NEXT: movq %rcx, 8(%rdi)
272; SSE-NEXT: movq %rcx, (%rdi)
273; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000274;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000275; SSE2FAST-LABEL: memset_64_nonconst_bytes:
276; SSE2FAST: movd %esi, %xmm0
277; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
278; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
279; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
280; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi)
281; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi)
282; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
283; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
284; SSE2FAST-NEXT: retq
285;
Sanjay Patel61e13242016-03-31 20:40:32 +0000286; AVX1-LABEL: memset_64_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000287; AVX1: vmovd %esi, %xmm0
288; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
289; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000290; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
291; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
292; AVX1-NEXT: vmovups %ymm0, (%rdi)
293; AVX1-NEXT: vzeroupper
294; AVX1-NEXT: retq
295;
296; AVX2-LABEL: memset_64_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000297; AVX2: vmovd %esi, %xmm0
298; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
299; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
300; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000301; AVX2-NEXT: vzeroupper
302; AVX2-NEXT: retq
303;
304 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i32 1, i1 false)
305 ret void
306}
307
308define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000309; SSE-LABEL: memset_128_nonconst_bytes:
310; SSE: movzbl %sil, %eax
311; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
312; SSE-NEXT: imulq %rax, %rcx
313; SSE-NEXT: movq %rcx, 120(%rdi)
314; SSE-NEXT: movq %rcx, 112(%rdi)
315; SSE-NEXT: movq %rcx, 104(%rdi)
316; SSE-NEXT: movq %rcx, 96(%rdi)
317; SSE-NEXT: movq %rcx, 88(%rdi)
318; SSE-NEXT: movq %rcx, 80(%rdi)
319; SSE-NEXT: movq %rcx, 72(%rdi)
320; SSE-NEXT: movq %rcx, 64(%rdi)
321; SSE-NEXT: movq %rcx, 56(%rdi)
322; SSE-NEXT: movq %rcx, 48(%rdi)
323; SSE-NEXT: movq %rcx, 40(%rdi)
324; SSE-NEXT: movq %rcx, 32(%rdi)
325; SSE-NEXT: movq %rcx, 24(%rdi)
326; SSE-NEXT: movq %rcx, 16(%rdi)
327; SSE-NEXT: movq %rcx, 8(%rdi)
328; SSE-NEXT: movq %rcx, (%rdi)
329; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000330;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000331; SSE2FAST-LABEL: memset_128_nonconst_bytes:
332; SSE2FAST: movd %esi, %xmm0
333; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
334; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
335; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
336; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi)
337; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi)
338; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi)
339; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi)
340; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi)
341; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi)
342; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
343; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
344; SSE2FAST-NEXT: retq
345;
Sanjay Patel61e13242016-03-31 20:40:32 +0000346; AVX1-LABEL: memset_128_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000347; AVX1: vmovd %esi, %xmm0
348; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
349; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000350; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
351; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
352; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
353; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
354; AVX1-NEXT: vmovups %ymm0, (%rdi)
355; AVX1-NEXT: vzeroupper
356; AVX1-NEXT: retq
357;
358; AVX2-LABEL: memset_128_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000359; AVX2: vmovd %esi, %xmm0
360; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
361; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
362; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
363; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
364; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000365; AVX2-NEXT: vzeroupper
366; AVX2-NEXT: retq
367;
368 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i32 1, i1 false)
369 ret void
370}
371
372define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000373; SSE-LABEL: memset_256_nonconst_bytes:
374; SSE: movl $256, %edx # imm = 0x100
375; SSE-NEXT: jmp memset # TAILCALL
Sanjay Patel61e13242016-03-31 20:40:32 +0000376;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000377; SSE2FAST-LABEL: memset_256_nonconst_bytes:
378; SSE2FAST: movd %esi, %xmm0
379; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
380; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
381; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
382; SSE2FAST-NEXT: movdqu %xmm0, 240(%rdi)
383; SSE2FAST-NEXT: movdqu %xmm0, 224(%rdi)
384; SSE2FAST-NEXT: movdqu %xmm0, 208(%rdi)
385; SSE2FAST-NEXT: movdqu %xmm0, 192(%rdi)
386; SSE2FAST-NEXT: movdqu %xmm0, 176(%rdi)
387; SSE2FAST-NEXT: movdqu %xmm0, 160(%rdi)
388; SSE2FAST-NEXT: movdqu %xmm0, 144(%rdi)
389; SSE2FAST-NEXT: movdqu %xmm0, 128(%rdi)
390; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi)
391; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi)
392; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi)
393; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi)
394; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi)
395; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi)
396; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
397; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
398; SSE2FAST-NEXT: retq
399;
Sanjay Patel61e13242016-03-31 20:40:32 +0000400; AVX1-LABEL: memset_256_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000401; AVX1: vmovd %esi, %xmm0
402; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
403; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000404; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
405; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
406; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
407; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
408; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
409; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
410; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
411; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
412; AVX1-NEXT: vmovups %ymm0, (%rdi)
413; AVX1-NEXT: vzeroupper
414; AVX1-NEXT: retq
415;
416; AVX2-LABEL: memset_256_nonconst_bytes:
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000417; AVX2: vmovd %esi, %xmm0
418; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
419; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi)
420; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi)
421; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi)
422; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi)
423; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
424; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
425; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
426; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000427; AVX2-NEXT: vzeroupper
428; AVX2-NEXT: retq
429;
430 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i32 1, i1 false)
431 ret void
432}
433
434declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
435