blob: 29fee07104050bdbfd0b9afa825834e1166f4d0d [file] [log] [blame]
Sanjay Patel7c615072016-03-29 17:09:27 +00001; NOTE: Assertions have been autogenerated by update_test_checks.py
Sanjay Patel9b5b5c82016-04-01 18:29:25 +00002; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE
3; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=SSE
4; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2,-slow-unaligned-mem-16 | FileCheck %s --check-prefix=SSE2FAST
5; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
Sanjay Patel7c615072016-03-29 17:09:27 +00007
Sanjay Patel61e13242016-03-31 20:40:32 +00008; https://llvm.org/bugs/show_bug.cgi?id=27100
9
Sanjay Patel7c615072016-03-29 17:09:27 +000010define void @memset_16_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000011; SSE-LABEL: memset_16_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000012; SSE: # BB#0:
13; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
Sanjay Pateld3e3d482016-04-01 18:11:30 +000014; SSE-NEXT: movq %rax, 8(%rdi)
15; SSE-NEXT: movq %rax, (%rdi)
16; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000017;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +000018; SSE2FAST-LABEL: memset_16_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000019; SSE2FAST: # BB#0:
20; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patel9b5b5c82016-04-01 18:29:25 +000021; SSE2FAST-NEXT: movups %xmm0, (%rdi)
22; SSE2FAST-NEXT: retq
23;
Sanjay Patel9f413362016-04-01 17:36:45 +000024; AVX-LABEL: memset_16_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000025; AVX: # BB#0:
26; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patel9f413362016-04-01 17:36:45 +000027; AVX-NEXT: vmovups %xmm0, (%rdi)
28; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000029;
30 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 16, i64 -1)
31 ret void
32}
33
34define void @memset_32_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000035; SSE-LABEL: memset_32_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000036; SSE: # BB#0:
37; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
Sanjay Pateld3e3d482016-04-01 18:11:30 +000038; SSE-NEXT: movq %rax, 24(%rdi)
39; SSE-NEXT: movq %rax, 16(%rdi)
40; SSE-NEXT: movq %rax, 8(%rdi)
41; SSE-NEXT: movq %rax, (%rdi)
42; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000043;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +000044; SSE2FAST-LABEL: memset_32_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000045; SSE2FAST: # BB#0:
46; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patel9b5b5c82016-04-01 18:29:25 +000047; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
48; SSE2FAST-NEXT: movups %xmm0, (%rdi)
49; SSE2FAST-NEXT: retq
50;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000051; AVX-LABEL: memset_32_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000052; AVX: # BB#0:
53; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patela05e0ff2016-04-01 16:27:14 +000054; AVX-NEXT: vmovups %ymm0, (%rdi)
55; AVX-NEXT: vzeroupper
56; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000057;
58 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 32, i64 -1)
59 ret void
60}
61
62define void @memset_64_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000063; SSE-LABEL: memset_64_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000064; SSE: # BB#0:
65; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
Sanjay Pateld3e3d482016-04-01 18:11:30 +000066; SSE-NEXT: movq %rax, 56(%rdi)
67; SSE-NEXT: movq %rax, 48(%rdi)
68; SSE-NEXT: movq %rax, 40(%rdi)
69; SSE-NEXT: movq %rax, 32(%rdi)
70; SSE-NEXT: movq %rax, 24(%rdi)
71; SSE-NEXT: movq %rax, 16(%rdi)
72; SSE-NEXT: movq %rax, 8(%rdi)
73; SSE-NEXT: movq %rax, (%rdi)
74; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +000075;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +000076; SSE2FAST-LABEL: memset_64_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000077; SSE2FAST: # BB#0:
78; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patel9b5b5c82016-04-01 18:29:25 +000079; SSE2FAST-NEXT: movups %xmm0, 48(%rdi)
80; SSE2FAST-NEXT: movups %xmm0, 32(%rdi)
81; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
82; SSE2FAST-NEXT: movups %xmm0, (%rdi)
83; SSE2FAST-NEXT: retq
84;
Sanjay Patela05e0ff2016-04-01 16:27:14 +000085; AVX-LABEL: memset_64_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000086; AVX: # BB#0:
87; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patela05e0ff2016-04-01 16:27:14 +000088; AVX-NEXT: vmovups %ymm0, 32(%rdi)
89; AVX-NEXT: vmovups %ymm0, (%rdi)
90; AVX-NEXT: vzeroupper
91; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +000092;
93 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1)
94 ret void
95}
96
97define void @memset_128_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +000098; SSE-LABEL: memset_128_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +000099; SSE: # BB#0:
100; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000101; SSE-NEXT: movq %rax, 120(%rdi)
102; SSE-NEXT: movq %rax, 112(%rdi)
103; SSE-NEXT: movq %rax, 104(%rdi)
104; SSE-NEXT: movq %rax, 96(%rdi)
105; SSE-NEXT: movq %rax, 88(%rdi)
106; SSE-NEXT: movq %rax, 80(%rdi)
107; SSE-NEXT: movq %rax, 72(%rdi)
108; SSE-NEXT: movq %rax, 64(%rdi)
109; SSE-NEXT: movq %rax, 56(%rdi)
110; SSE-NEXT: movq %rax, 48(%rdi)
111; SSE-NEXT: movq %rax, 40(%rdi)
112; SSE-NEXT: movq %rax, 32(%rdi)
113; SSE-NEXT: movq %rax, 24(%rdi)
114; SSE-NEXT: movq %rax, 16(%rdi)
115; SSE-NEXT: movq %rax, 8(%rdi)
116; SSE-NEXT: movq %rax, (%rdi)
117; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +0000118;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000119; SSE2FAST-LABEL: memset_128_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000120; SSE2FAST: # BB#0:
121; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000122; SSE2FAST-NEXT: movups %xmm0, 112(%rdi)
123; SSE2FAST-NEXT: movups %xmm0, 96(%rdi)
124; SSE2FAST-NEXT: movups %xmm0, 80(%rdi)
125; SSE2FAST-NEXT: movups %xmm0, 64(%rdi)
126; SSE2FAST-NEXT: movups %xmm0, 48(%rdi)
127; SSE2FAST-NEXT: movups %xmm0, 32(%rdi)
128; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
129; SSE2FAST-NEXT: movups %xmm0, (%rdi)
130; SSE2FAST-NEXT: retq
131;
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000132; AVX-LABEL: memset_128_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000133; AVX: # BB#0:
134; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000135; AVX-NEXT: vmovups %ymm0, 96(%rdi)
136; AVX-NEXT: vmovups %ymm0, 64(%rdi)
137; AVX-NEXT: vmovups %ymm0, 32(%rdi)
138; AVX-NEXT: vmovups %ymm0, (%rdi)
139; AVX-NEXT: vzeroupper
140; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000141;
142 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1)
143 ret void
144}
145
146define void @memset_256_nonzero_bytes(i8* %x) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000147; SSE-LABEL: memset_256_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000148; SSE: # BB#0:
149; SSE-NEXT: pushq %rax
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000150; SSE-NEXT: .Ltmp0:
151; SSE-NEXT: .cfi_def_cfa_offset 16
152; SSE-NEXT: movl $42, %esi
153; SSE-NEXT: movl $256, %edx # imm = 0x100
154; SSE-NEXT: callq memset
155; SSE-NEXT: popq %rax
156; SSE-NEXT: retq
Sanjay Patel92d5ea5e2016-03-31 17:30:06 +0000157;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000158; SSE2FAST-LABEL: memset_256_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000159; SSE2FAST: # BB#0:
160; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000161; SSE2FAST-NEXT: movups %xmm0, 240(%rdi)
162; SSE2FAST-NEXT: movups %xmm0, 224(%rdi)
163; SSE2FAST-NEXT: movups %xmm0, 208(%rdi)
164; SSE2FAST-NEXT: movups %xmm0, 192(%rdi)
165; SSE2FAST-NEXT: movups %xmm0, 176(%rdi)
166; SSE2FAST-NEXT: movups %xmm0, 160(%rdi)
167; SSE2FAST-NEXT: movups %xmm0, 144(%rdi)
168; SSE2FAST-NEXT: movups %xmm0, 128(%rdi)
169; SSE2FAST-NEXT: movups %xmm0, 112(%rdi)
170; SSE2FAST-NEXT: movups %xmm0, 96(%rdi)
171; SSE2FAST-NEXT: movups %xmm0, 80(%rdi)
172; SSE2FAST-NEXT: movups %xmm0, 64(%rdi)
173; SSE2FAST-NEXT: movups %xmm0, 48(%rdi)
174; SSE2FAST-NEXT: movups %xmm0, 32(%rdi)
175; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
176; SSE2FAST-NEXT: movups %xmm0, (%rdi)
177; SSE2FAST-NEXT: retq
178;
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000179; AVX-LABEL: memset_256_nonzero_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000180; AVX: # BB#0:
181; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000182; AVX-NEXT: vmovups %ymm0, 224(%rdi)
183; AVX-NEXT: vmovups %ymm0, 192(%rdi)
184; AVX-NEXT: vmovups %ymm0, 160(%rdi)
185; AVX-NEXT: vmovups %ymm0, 128(%rdi)
186; AVX-NEXT: vmovups %ymm0, 96(%rdi)
187; AVX-NEXT: vmovups %ymm0, 64(%rdi)
188; AVX-NEXT: vmovups %ymm0, 32(%rdi)
189; AVX-NEXT: vmovups %ymm0, (%rdi)
190; AVX-NEXT: vzeroupper
191; AVX-NEXT: retq
Sanjay Patel7c615072016-03-29 17:09:27 +0000192;
193 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1)
194 ret void
195}
196
197declare i8* @__memset_chk(i8*, i32, i64, i64)
198
Sanjay Patel61e13242016-03-31 20:40:32 +0000199; Repeat with a non-constant value for the stores.
200
201define void @memset_16_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000202; SSE-LABEL: memset_16_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000203; SSE: # BB#0:
204; SSE-NEXT: movzbl %sil, %eax
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000205; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
206; SSE-NEXT: imulq %rax, %rcx
207; SSE-NEXT: movq %rcx, 8(%rdi)
208; SSE-NEXT: movq %rcx, (%rdi)
209; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000210;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000211; SSE2FAST-LABEL: memset_16_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000212; SSE2FAST: # BB#0:
213; SSE2FAST-NEXT: movd %esi, %xmm0
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000214; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
215; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
216; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
217; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
218; SSE2FAST-NEXT: retq
219;
Sanjay Patel61e13242016-03-31 20:40:32 +0000220; AVX1-LABEL: memset_16_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000221; AVX1: # BB#0:
222; AVX1-NEXT: vmovd %esi, %xmm0
Sanjay Patel9f413362016-04-01 17:36:45 +0000223; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
224; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000225; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
226; AVX1-NEXT: retq
227;
228; AVX2-LABEL: memset_16_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000229; AVX2: # BB#0:
230; AVX2-NEXT: vmovd %esi, %xmm0
Sanjay Patel9f413362016-04-01 17:36:45 +0000231; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
232; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000233; AVX2-NEXT: retq
234;
235 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i32 1, i1 false)
236 ret void
237}
238
239define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000240; SSE-LABEL: memset_32_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000241; SSE: # BB#0:
242; SSE-NEXT: movzbl %sil, %eax
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000243; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
244; SSE-NEXT: imulq %rax, %rcx
245; SSE-NEXT: movq %rcx, 24(%rdi)
246; SSE-NEXT: movq %rcx, 16(%rdi)
247; SSE-NEXT: movq %rcx, 8(%rdi)
248; SSE-NEXT: movq %rcx, (%rdi)
249; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000250;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000251; SSE2FAST-LABEL: memset_32_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000252; SSE2FAST: # BB#0:
253; SSE2FAST-NEXT: movd %esi, %xmm0
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000254; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
255; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
256; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
257; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
258; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
259; SSE2FAST-NEXT: retq
260;
Sanjay Patel61e13242016-03-31 20:40:32 +0000261; AVX1-LABEL: memset_32_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000262; AVX1: # BB#0:
263; AVX1-NEXT: vmovd %esi, %xmm0
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000264; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
265; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000266; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
267; AVX1-NEXT: vmovups %ymm0, (%rdi)
268; AVX1-NEXT: vzeroupper
269; AVX1-NEXT: retq
270;
271; AVX2-LABEL: memset_32_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000272; AVX2: # BB#0:
273; AVX2-NEXT: vmovd %esi, %xmm0
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000274; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
275; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000276; AVX2-NEXT: vzeroupper
277; AVX2-NEXT: retq
278;
279 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i32 1, i1 false)
280 ret void
281}
282
283define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000284; SSE-LABEL: memset_64_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000285; SSE: # BB#0:
286; SSE-NEXT: movzbl %sil, %eax
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000287; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
288; SSE-NEXT: imulq %rax, %rcx
289; SSE-NEXT: movq %rcx, 56(%rdi)
290; SSE-NEXT: movq %rcx, 48(%rdi)
291; SSE-NEXT: movq %rcx, 40(%rdi)
292; SSE-NEXT: movq %rcx, 32(%rdi)
293; SSE-NEXT: movq %rcx, 24(%rdi)
294; SSE-NEXT: movq %rcx, 16(%rdi)
295; SSE-NEXT: movq %rcx, 8(%rdi)
296; SSE-NEXT: movq %rcx, (%rdi)
297; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000298;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000299; SSE2FAST-LABEL: memset_64_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000300; SSE2FAST: # BB#0:
301; SSE2FAST-NEXT: movd %esi, %xmm0
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000302; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
303; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
304; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
305; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi)
306; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi)
307; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
308; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
309; SSE2FAST-NEXT: retq
310;
Sanjay Patel61e13242016-03-31 20:40:32 +0000311; AVX1-LABEL: memset_64_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000312; AVX1: # BB#0:
313; AVX1-NEXT: vmovd %esi, %xmm0
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000314; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
315; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000316; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
317; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
318; AVX1-NEXT: vmovups %ymm0, (%rdi)
319; AVX1-NEXT: vzeroupper
320; AVX1-NEXT: retq
321;
322; AVX2-LABEL: memset_64_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000323; AVX2: # BB#0:
324; AVX2-NEXT: vmovd %esi, %xmm0
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000325; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
326; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
327; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000328; AVX2-NEXT: vzeroupper
329; AVX2-NEXT: retq
330;
331 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i32 1, i1 false)
332 ret void
333}
334
335define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000336; SSE-LABEL: memset_128_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000337; SSE: # BB#0:
338; SSE-NEXT: movzbl %sil, %eax
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000339; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
340; SSE-NEXT: imulq %rax, %rcx
341; SSE-NEXT: movq %rcx, 120(%rdi)
342; SSE-NEXT: movq %rcx, 112(%rdi)
343; SSE-NEXT: movq %rcx, 104(%rdi)
344; SSE-NEXT: movq %rcx, 96(%rdi)
345; SSE-NEXT: movq %rcx, 88(%rdi)
346; SSE-NEXT: movq %rcx, 80(%rdi)
347; SSE-NEXT: movq %rcx, 72(%rdi)
348; SSE-NEXT: movq %rcx, 64(%rdi)
349; SSE-NEXT: movq %rcx, 56(%rdi)
350; SSE-NEXT: movq %rcx, 48(%rdi)
351; SSE-NEXT: movq %rcx, 40(%rdi)
352; SSE-NEXT: movq %rcx, 32(%rdi)
353; SSE-NEXT: movq %rcx, 24(%rdi)
354; SSE-NEXT: movq %rcx, 16(%rdi)
355; SSE-NEXT: movq %rcx, 8(%rdi)
356; SSE-NEXT: movq %rcx, (%rdi)
357; SSE-NEXT: retq
Sanjay Patel61e13242016-03-31 20:40:32 +0000358;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000359; SSE2FAST-LABEL: memset_128_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000360; SSE2FAST: # BB#0:
361; SSE2FAST-NEXT: movd %esi, %xmm0
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000362; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
363; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
364; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
365; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi)
366; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi)
367; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi)
368; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi)
369; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi)
370; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi)
371; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
372; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
373; SSE2FAST-NEXT: retq
374;
Sanjay Patel61e13242016-03-31 20:40:32 +0000375; AVX1-LABEL: memset_128_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000376; AVX1: # BB#0:
377; AVX1-NEXT: vmovd %esi, %xmm0
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000378; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
379; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000380; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
381; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
382; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
383; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
384; AVX1-NEXT: vmovups %ymm0, (%rdi)
385; AVX1-NEXT: vzeroupper
386; AVX1-NEXT: retq
387;
388; AVX2-LABEL: memset_128_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000389; AVX2: # BB#0:
390; AVX2-NEXT: vmovd %esi, %xmm0
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000391; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
392; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
393; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
394; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
395; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000396; AVX2-NEXT: vzeroupper
397; AVX2-NEXT: retq
398;
399 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i32 1, i1 false)
400 ret void
401}
402
403define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000404; SSE-LABEL: memset_256_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000405; SSE: # BB#0:
406; SSE-NEXT: movl $256, %edx # imm = 0x100
Sanjay Pateld3e3d482016-04-01 18:11:30 +0000407; SSE-NEXT: jmp memset # TAILCALL
Sanjay Patel61e13242016-03-31 20:40:32 +0000408;
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000409; SSE2FAST-LABEL: memset_256_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000410; SSE2FAST: # BB#0:
411; SSE2FAST-NEXT: movd %esi, %xmm0
Sanjay Patel9b5b5c82016-04-01 18:29:25 +0000412; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
413; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
414; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
415; SSE2FAST-NEXT: movdqu %xmm0, 240(%rdi)
416; SSE2FAST-NEXT: movdqu %xmm0, 224(%rdi)
417; SSE2FAST-NEXT: movdqu %xmm0, 208(%rdi)
418; SSE2FAST-NEXT: movdqu %xmm0, 192(%rdi)
419; SSE2FAST-NEXT: movdqu %xmm0, 176(%rdi)
420; SSE2FAST-NEXT: movdqu %xmm0, 160(%rdi)
421; SSE2FAST-NEXT: movdqu %xmm0, 144(%rdi)
422; SSE2FAST-NEXT: movdqu %xmm0, 128(%rdi)
423; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi)
424; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi)
425; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi)
426; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi)
427; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi)
428; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi)
429; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
430; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
431; SSE2FAST-NEXT: retq
432;
Sanjay Patel61e13242016-03-31 20:40:32 +0000433; AVX1-LABEL: memset_256_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000434; AVX1: # BB#0:
435; AVX1-NEXT: vmovd %esi, %xmm0
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000436; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
437; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
Sanjay Patel61e13242016-03-31 20:40:32 +0000438; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
439; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
440; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
441; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
442; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
443; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
444; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
445; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
446; AVX1-NEXT: vmovups %ymm0, (%rdi)
447; AVX1-NEXT: vzeroupper
448; AVX1-NEXT: retq
449;
450; AVX2-LABEL: memset_256_nonconst_bytes:
Sanjay Patel04848792016-04-05 17:12:19 +0000451; AVX2: # BB#0:
452; AVX2-NEXT: vmovd %esi, %xmm0
Sanjay Patela05e0ff2016-04-01 16:27:14 +0000453; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
454; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi)
455; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi)
456; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi)
457; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi)
458; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
459; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
460; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
461; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
Sanjay Patel61e13242016-03-31 20:40:32 +0000462; AVX2-NEXT: vzeroupper
463; AVX2-NEXT: retq
464;
465 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i32 1, i1 false)
466 ret void
467}
468
469declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
470