blob: 1f2e77dbc629943c98d288f8ff2a1424bfe98de5 [file] [log] [blame]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -O2 | FileCheck %s --check-prefix=AVX512
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl -O2 | FileCheck %s --check-prefix=AVX512NOTDQ
4
5define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
6; AVX512-LABEL: load_v8i1_broadcast_4_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00008; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +00009; AVX512-NEXT: kshiftrb $4, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +000010; AVX512-NEXT: vpmovm2q %k0, %xmm2
11; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
12; AVX512-NEXT: vpmovq2m %xmm2, %k1
13; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
14; AVX512-NEXT: vmovapd %xmm1, (%rsi)
15; AVX512-NEXT: retq
16;
17; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000018; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000019; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
20; AVX512NOTDQ-NEXT: kmovd %eax, %k0
21; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k1
22; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
23; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
24; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
25; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
26; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
27; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
28; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
29; AVX512NOTDQ-NEXT: retq
30 %d0 = load <8 x i1>, <8 x i1>* %a0
31 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 4,i32 4>
32 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
33 store <2 x double> %d2, <2 x double>* %a3
34 ret void
35}
36define void @load_v8i1_broadcast_7_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
37; AVX512-LABEL: load_v8i1_broadcast_7_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000038; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000039; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +000040; AVX512-NEXT: kshiftrb $6, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +000041; AVX512-NEXT: vpmovm2q %k0, %xmm2
42; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
43; AVX512-NEXT: vpmovq2m %xmm2, %k1
44; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
45; AVX512-NEXT: vmovapd %xmm1, (%rsi)
46; AVX512-NEXT: retq
47;
48; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000049; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000050; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
51; AVX512NOTDQ-NEXT: kmovd %eax, %k0
52; AVX512NOTDQ-NEXT: kshiftrw $6, %k0, %k1
53; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
54; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
55; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
56; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
57; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
58; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
59; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
60; AVX512NOTDQ-NEXT: retq
61 %d0 = load <8 x i1>, <8 x i1>* %a0
62 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 7,i32 7>
63 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
64 store <2 x double> %d2, <2 x double>* %a3
65 ret void
66}
67define void @load_v16i1_broadcast_8_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
68; AVX512-LABEL: load_v16i1_broadcast_8_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000069; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000070; AVX512-NEXT: kmovw (%rdi), %k0
71; AVX512-NEXT: kshiftrw $8, %k0, %k0
72; AVX512-NEXT: vpmovm2q %k0, %xmm2
73; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
74; AVX512-NEXT: vpmovq2m %xmm2, %k1
75; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
76; AVX512-NEXT: vmovapd %xmm1, (%rsi)
77; AVX512-NEXT: retq
78;
79; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000080; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000081; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
82; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
83; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
84; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
85; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
86; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
87; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
88; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
89; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
90; AVX512NOTDQ-NEXT: retq
91 %d0 = load <16 x i1>, <16 x i1>* %a0
92 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 8,i32 8>
93 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
94 store <2 x double> %d2, <2 x double>* %a3
95 ret void
96}
97define void @load_v16i1_broadcast_8_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
98; AVX512-LABEL: load_v16i1_broadcast_8_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000099; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000100; AVX512-NEXT: kmovw (%rdi), %k0
101; AVX512-NEXT: kshiftrw $8, %k0, %k0
102; AVX512-NEXT: vpmovm2d %k0, %xmm2
103; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
104; AVX512-NEXT: vpmovd2m %xmm2, %k1
105; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
106; AVX512-NEXT: vmovaps %xmm1, (%rsi)
107; AVX512-NEXT: retq
108;
109; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000110; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000111; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
112; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
113; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
114; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
115; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
116; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
117; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
118; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
119; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
120; AVX512NOTDQ-NEXT: retq
121 %d0 = load <16 x i1>, <16 x i1>* %a0
122 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 8,i32 8,i32 8,i32 8>
123 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
124 store <4 x float> %d2, <4 x float>* %a3
125 ret void
126}
127define void @load_v16i1_broadcast_15_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
128; AVX512-LABEL: load_v16i1_broadcast_15_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000129; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000130; AVX512-NEXT: kmovw (%rdi), %k0
131; AVX512-NEXT: kshiftrw $14, %k0, %k0
132; AVX512-NEXT: vpmovm2q %k0, %xmm2
133; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
134; AVX512-NEXT: vpmovq2m %xmm2, %k1
135; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
136; AVX512-NEXT: vmovapd %xmm1, (%rsi)
137; AVX512-NEXT: retq
138;
139; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000140; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000141; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
142; AVX512NOTDQ-NEXT: kshiftrw $14, %k0, %k1
143; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
144; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
145; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
146; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
147; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
148; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
149; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
150; AVX512NOTDQ-NEXT: retq
151 %d0 = load <16 x i1>, <16 x i1>* %a0
152 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 15,i32 15>
153 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
154 store <2 x double> %d2, <2 x double>* %a3
155 ret void
156}
157define void @load_v16i1_broadcast_15_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
158; AVX512-LABEL: load_v16i1_broadcast_15_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000159; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000160; AVX512-NEXT: kmovw (%rdi), %k0
161; AVX512-NEXT: kshiftrw $12, %k0, %k0
162; AVX512-NEXT: vpmovm2d %k0, %xmm2
163; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
164; AVX512-NEXT: vpmovd2m %xmm2, %k1
165; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
166; AVX512-NEXT: vmovaps %xmm1, (%rsi)
167; AVX512-NEXT: retq
168;
169; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000170; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000171; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
172; AVX512NOTDQ-NEXT: kshiftrw $12, %k0, %k1
173; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
174; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
175; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
176; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
177; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
178; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
179; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
180; AVX512NOTDQ-NEXT: retq
181 %d0 = load <16 x i1>, <16 x i1>* %a0
182 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 15,i32 15,i32 15,i32 15>
183 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
184 store <4 x float> %d2, <4 x float>* %a3
185 ret void
186}
187define void @load_v32i1_broadcast_16_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
188; AVX512-LABEL: load_v32i1_broadcast_16_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000189; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000190; AVX512-NEXT: kmovd (%rdi), %k0
191; AVX512-NEXT: kshiftrd $16, %k0, %k0
192; AVX512-NEXT: vpmovm2q %k0, %xmm2
193; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
194; AVX512-NEXT: vpmovq2m %xmm2, %k1
195; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
196; AVX512-NEXT: vmovapd %xmm1, (%rsi)
197; AVX512-NEXT: retq
198;
199; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000200; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000201; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
202; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
203; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
204; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
205; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
206; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
207; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
208; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
209; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
210; AVX512NOTDQ-NEXT: retq
211 %d0 = load <32 x i1>, <32 x i1>* %a0
212 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 16,i32 16>
213 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
214 store <2 x double> %d2, <2 x double>* %a3
215 ret void
216}
217define void @load_v32i1_broadcast_16_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
218; AVX512-LABEL: load_v32i1_broadcast_16_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000219; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000220; AVX512-NEXT: kmovd (%rdi), %k0
221; AVX512-NEXT: kshiftrd $16, %k0, %k0
222; AVX512-NEXT: vpmovm2d %k0, %xmm2
223; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
224; AVX512-NEXT: vpmovd2m %xmm2, %k1
225; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
226; AVX512-NEXT: vmovaps %xmm1, (%rsi)
227; AVX512-NEXT: retq
228;
229; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000230; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000231; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
232; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
233; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
234; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
235; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
236; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
237; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
238; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
239; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
240; AVX512NOTDQ-NEXT: retq
241 %d0 = load <32 x i1>, <32 x i1>* %a0
242 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 16,i32 16,i32 16,i32 16>
243 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
244 store <4 x float> %d2, <4 x float>* %a3
245 ret void
246}
247define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
248; AVX512-LABEL: load_v32i1_broadcast_16_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000249; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000250; AVX512-NEXT: kmovd (%rdi), %k0
251; AVX512-NEXT: kshiftrd $16, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +0000252; AVX512-NEXT: vpmovm2d %k0, %ymm2
253; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
254; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000255; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
256; AVX512-NEXT: vmovaps %ymm1, (%rsi)
257; AVX512-NEXT: vzeroupper
258; AVX512-NEXT: retq
259;
260; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000261; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000262; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
263; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +0000264; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
265; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
266; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
267; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
268; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000269; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
270; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
271; AVX512NOTDQ-NEXT: vzeroupper
272; AVX512NOTDQ-NEXT: retq
273 %d0 = load <32 x i1>, <32 x i1>* %a0
274 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16>
275 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
276 store <8 x float> %d2, <8 x float>* %a3
277 ret void
278}
279define void @load_v32i1_broadcast_31_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
280; AVX512-LABEL: load_v32i1_broadcast_31_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000281; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000282; AVX512-NEXT: kmovd (%rdi), %k0
283; AVX512-NEXT: kshiftrd $30, %k0, %k0
284; AVX512-NEXT: vpmovm2q %k0, %xmm2
285; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
286; AVX512-NEXT: vpmovq2m %xmm2, %k1
287; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
288; AVX512-NEXT: vmovapd %xmm1, (%rsi)
289; AVX512-NEXT: retq
290;
291; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000292; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000293; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
294; AVX512NOTDQ-NEXT: kshiftrd $30, %k0, %k1
295; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
296; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
297; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
298; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
299; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
300; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
301; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
302; AVX512NOTDQ-NEXT: retq
303 %d0 = load <32 x i1>, <32 x i1>* %a0
304 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 31,i32 31>
305 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
306 store <2 x double> %d2, <2 x double>* %a3
307 ret void
308}
309define void @load_v32i1_broadcast_31_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
310; AVX512-LABEL: load_v32i1_broadcast_31_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000311; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000312; AVX512-NEXT: kmovd (%rdi), %k0
313; AVX512-NEXT: kshiftrd $28, %k0, %k0
314; AVX512-NEXT: vpmovm2d %k0, %xmm2
315; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
316; AVX512-NEXT: vpmovd2m %xmm2, %k1
317; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
318; AVX512-NEXT: vmovaps %xmm1, (%rsi)
319; AVX512-NEXT: retq
320;
321; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000322; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000323; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
324; AVX512NOTDQ-NEXT: kshiftrd $28, %k0, %k1
325; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
326; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
327; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
328; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
329; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
330; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
331; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
332; AVX512NOTDQ-NEXT: retq
333 %d0 = load <32 x i1>, <32 x i1>* %a0
334 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 31,i32 31,i32 31,i32 31>
335 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
336 store <4 x float> %d2, <4 x float>* %a3
337 ret void
338}
339define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
340; AVX512-LABEL: load_v32i1_broadcast_31_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000341; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000342; AVX512-NEXT: kmovd (%rdi), %k0
343; AVX512-NEXT: kshiftrd $24, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +0000344; AVX512-NEXT: vpmovm2d %k0, %ymm2
345; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
346; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
347; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000348; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
349; AVX512-NEXT: vmovaps %ymm1, (%rsi)
350; AVX512-NEXT: vzeroupper
351; AVX512-NEXT: retq
352;
353; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000354; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000355; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
356; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +0000357; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
358; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
359; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
360; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
361; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
362; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000363; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
364; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
365; AVX512NOTDQ-NEXT: vzeroupper
366; AVX512NOTDQ-NEXT: retq
367 %d0 = load <32 x i1>, <32 x i1>* %a0
368 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31>
369 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
370 store <8 x float> %d2, <8 x float>* %a3
371 ret void
372}
373define void @load_v64i1_broadcast_32_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
374; AVX512-LABEL: load_v64i1_broadcast_32_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000375; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000376; AVX512-NEXT: kmovq (%rdi), %k0
377; AVX512-NEXT: kshiftrq $32, %k0, %k0
378; AVX512-NEXT: vpmovm2q %k0, %xmm2
379; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
380; AVX512-NEXT: vpmovq2m %xmm2, %k1
381; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
382; AVX512-NEXT: vmovapd %xmm1, (%rsi)
383; AVX512-NEXT: retq
384;
385; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000386; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000387; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
388; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
389; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
390; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
391; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
392; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
393; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
394; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
395; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
396; AVX512NOTDQ-NEXT: retq
397 %d0 = load <64 x i1>, <64 x i1>* %a0
398 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 32,i32 32>
399 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
400 store <2 x double> %d2, <2 x double>* %a3
401 ret void
402}
403define void @load_v64i1_broadcast_32_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
404; AVX512-LABEL: load_v64i1_broadcast_32_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000405; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000406; AVX512-NEXT: kmovq (%rdi), %k0
407; AVX512-NEXT: kshiftrq $32, %k0, %k0
408; AVX512-NEXT: vpmovm2d %k0, %xmm2
409; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
410; AVX512-NEXT: vpmovd2m %xmm2, %k1
411; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
412; AVX512-NEXT: vmovaps %xmm1, (%rsi)
413; AVX512-NEXT: retq
414;
415; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000416; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000417; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
418; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
419; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
420; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
421; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
422; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
423; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
424; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
425; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
426; AVX512NOTDQ-NEXT: retq
427 %d0 = load <64 x i1>, <64 x i1>* %a0
428 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 32,i32 32,i32 32,i32 32>
429 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
430 store <4 x float> %d2, <4 x float>* %a3
431 ret void
432}
433define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
434; AVX512-LABEL: load_v64i1_broadcast_32_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000435; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000436; AVX512-NEXT: kmovq (%rdi), %k0
437; AVX512-NEXT: kshiftrq $32, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +0000438; AVX512-NEXT: vpmovm2d %k0, %ymm2
439; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
440; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000441; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
442; AVX512-NEXT: vmovaps %ymm1, (%rsi)
443; AVX512-NEXT: vzeroupper
444; AVX512-NEXT: retq
445;
446; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000447; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000448; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
449; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +0000450; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
451; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
452; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
453; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
454; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000455; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
456; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
457; AVX512NOTDQ-NEXT: vzeroupper
458; AVX512NOTDQ-NEXT: retq
459 %d0 = load <64 x i1>, <64 x i1>* %a0
460 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
461 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
462 store <8 x float> %d2, <8 x float>* %a3
463 ret void
464}
465define void @load_v64i1_broadcast_32_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
466; AVX512-LABEL: load_v64i1_broadcast_32_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000467; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000468; AVX512-NEXT: kmovq (%rdi), %k0
469; AVX512-NEXT: kshiftrq $32, %k0, %k0
470; AVX512-NEXT: vpmovm2d %k0, %zmm2
471; AVX512-NEXT: vpbroadcastd %xmm2, %zmm2
472; AVX512-NEXT: vpmovd2m %zmm2, %k1
473; AVX512-NEXT: vmovaps %zmm0, %zmm1 {%k1}
474; AVX512-NEXT: vmovaps %zmm1, (%rsi)
475; AVX512-NEXT: vzeroupper
476; AVX512-NEXT: retq
477;
478; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000479; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000480; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
481; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
482; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
483; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %zmm2
484; AVX512NOTDQ-NEXT: vpslld $31, %zmm2, %zmm2
485; AVX512NOTDQ-NEXT: vptestmd %zmm2, %zmm2, %k1
486; AVX512NOTDQ-NEXT: vmovaps %zmm0, %zmm1 {%k1}
487; AVX512NOTDQ-NEXT: vmovaps %zmm1, (%rsi)
488; AVX512NOTDQ-NEXT: vzeroupper
489; AVX512NOTDQ-NEXT: retq
490 %d0 = load <64 x i1>, <64 x i1>* %a0
491 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
492 %d2 = select <16 x i1> %d1, <16 x float> %a1, <16 x float> %a2
493 store <16 x float> %d2, <16 x float>* %a3
494 ret void
495}
496define void @load_v64i1_broadcast_63_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
497; AVX512-LABEL: load_v64i1_broadcast_63_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000498; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000499; AVX512-NEXT: kmovq (%rdi), %k0
500; AVX512-NEXT: kshiftrq $62, %k0, %k0
501; AVX512-NEXT: vpmovm2q %k0, %xmm2
502; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
503; AVX512-NEXT: vpmovq2m %xmm2, %k1
504; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
505; AVX512-NEXT: vmovapd %xmm1, (%rsi)
506; AVX512-NEXT: retq
507;
508; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000509; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000510; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
511; AVX512NOTDQ-NEXT: kshiftrq $62, %k0, %k1
512; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
513; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
514; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
515; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
516; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
517; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
518; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
519; AVX512NOTDQ-NEXT: retq
520 %d0 = load <64 x i1>, <64 x i1>* %a0
521 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 63,i32 63>
522 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
523 store <2 x double> %d2, <2 x double>* %a3
524 ret void
525}
526define void @load_v64i1_broadcast_63_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
527; AVX512-LABEL: load_v64i1_broadcast_63_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000528; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000529; AVX512-NEXT: kmovq (%rdi), %k0
530; AVX512-NEXT: kshiftrq $60, %k0, %k0
531; AVX512-NEXT: vpmovm2d %k0, %xmm2
532; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
533; AVX512-NEXT: vpmovd2m %xmm2, %k1
534; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
535; AVX512-NEXT: vmovaps %xmm1, (%rsi)
536; AVX512-NEXT: retq
537;
538; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000539; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000540; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
541; AVX512NOTDQ-NEXT: kshiftrq $60, %k0, %k1
542; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
543; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
544; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
545; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
546; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
547; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
548; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
549; AVX512NOTDQ-NEXT: retq
550 %d0 = load <64 x i1>, <64 x i1>* %a0
551 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 63,i32 63,i32 63,i32 63>
552 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
553 store <4 x float> %d2, <4 x float>* %a3
554 ret void
555}
556define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
557; AVX512-LABEL: load_v64i1_broadcast_63_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000558; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000559; AVX512-NEXT: kmovq (%rdi), %k0
560; AVX512-NEXT: kshiftrq $56, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +0000561; AVX512-NEXT: vpmovm2d %k0, %ymm2
562; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
563; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
564; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000565; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
566; AVX512-NEXT: vmovaps %ymm1, (%rsi)
567; AVX512-NEXT: vzeroupper
568; AVX512-NEXT: retq
569;
570; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000571; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000572; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
573; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +0000574; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
575; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
576; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
577; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
578; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
579; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000580; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
581; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
582; AVX512NOTDQ-NEXT: vzeroupper
583; AVX512NOTDQ-NEXT: retq
584 %d0 = load <64 x i1>, <64 x i1>* %a0
585 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
586 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
587 store <8 x float> %d2, <8 x float>* %a3
588 ret void
589}
590define void @load_v64i1_broadcast_63_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
591; AVX512-LABEL: load_v64i1_broadcast_63_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000592; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000593; AVX512-NEXT: kmovq (%rdi), %k0
594; AVX512-NEXT: kshiftrq $48, %k0, %k0
595; AVX512-NEXT: vpmovm2d %k0, %zmm2
596; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
597; AVX512-NEXT: vpermd %zmm2, %zmm3, %zmm2
598; AVX512-NEXT: vpmovd2m %zmm2, %k1
599; AVX512-NEXT: vmovaps %zmm0, %zmm1 {%k1}
600; AVX512-NEXT: vmovaps %zmm1, (%rsi)
601; AVX512-NEXT: vzeroupper
602; AVX512-NEXT: retq
603;
604; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000605; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000606; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
607; AVX512NOTDQ-NEXT: kshiftrq $48, %k0, %k1
608; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
609; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
610; AVX512NOTDQ-NEXT: vpermd %zmm2, %zmm3, %zmm2
611; AVX512NOTDQ-NEXT: vpslld $31, %zmm2, %zmm2
612; AVX512NOTDQ-NEXT: vptestmd %zmm2, %zmm2, %k1
613; AVX512NOTDQ-NEXT: vmovaps %zmm0, %zmm1 {%k1}
614; AVX512NOTDQ-NEXT: vmovaps %zmm1, (%rsi)
615; AVX512NOTDQ-NEXT: vzeroupper
616; AVX512NOTDQ-NEXT: retq
617 %d0 = load <64 x i1>, <64 x i1>* %a0
618 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
619 %d2 = select <16 x i1> %d1, <16 x float> %a1, <16 x float> %a2
620 store <16 x float> %d2, <16 x float>* %a3
621 ret void
622}
623define void @load_v2i1_broadcast_1_v1i1_store(<2 x i1>* %a0,<1 x i1>* %a1) {
624; AVX512-LABEL: load_v2i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000625; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000626; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000627; AVX512-NEXT: kshiftrb $1, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000628; AVX512-NEXT: kmovb %k0, (%rsi)
629; AVX512-NEXT: retq
630;
631; AVX512NOTDQ-LABEL: load_v2i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000632; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000633; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
634; AVX512NOTDQ-NEXT: kmovd %eax, %k0
635; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
636; AVX512NOTDQ-NEXT: kmovd %k0, %eax
637; AVX512NOTDQ-NEXT: movb %al, (%rsi)
638; AVX512NOTDQ-NEXT: retq
639 %d0 = load <2 x i1>, <2 x i1>* %a0
640 %d1 = shufflevector <2 x i1> %d0,<2 x i1> undef,<1 x i32><i32 1>
641 store <1 x i1> %d1, <1 x i1>* %a1
642 ret void
643}
644define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
645; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000646; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000647; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000648; AVX512-NEXT: kshiftrb $1, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000649; AVX512-NEXT: kmovb %k0, (%rsi)
650; AVX512-NEXT: retq
651;
652; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000653; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000654; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
655; AVX512NOTDQ-NEXT: kmovd %eax, %k0
656; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
657; AVX512NOTDQ-NEXT: kmovd %k0, %eax
658; AVX512NOTDQ-NEXT: movb %al, (%rsi)
659; AVX512NOTDQ-NEXT: retq
660 %d0 = load <3 x i1>, <3 x i1>* %a0
661 %d1 = shufflevector <3 x i1> %d0,<3 x i1> undef,<1 x i32><i32 1>
662 store <1 x i1> %d1, <1 x i1>* %a1
663 ret void
664}
665define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
666; AVX512-LABEL: load_v3i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000667; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000668; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000669; AVX512-NEXT: kshiftrb $2, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000670; AVX512-NEXT: kmovb %k0, (%rsi)
671; AVX512-NEXT: retq
672;
673; AVX512NOTDQ-LABEL: load_v3i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000674; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000675; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
676; AVX512NOTDQ-NEXT: kmovd %eax, %k0
677; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
678; AVX512NOTDQ-NEXT: kmovd %k0, %eax
679; AVX512NOTDQ-NEXT: movb %al, (%rsi)
680; AVX512NOTDQ-NEXT: retq
681 %d0 = load <3 x i1>, <3 x i1>* %a0
682 %d1 = shufflevector <3 x i1> %d0,<3 x i1> undef,<1 x i32><i32 2>
683 store <1 x i1> %d1, <1 x i1>* %a1
684 ret void
685}
686define void @load_v4i1_broadcast_2_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
687; AVX512-LABEL: load_v4i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000688; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000689; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000690; AVX512-NEXT: kshiftrb $2, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000691; AVX512-NEXT: kmovb %k0, (%rsi)
692; AVX512-NEXT: retq
693;
694; AVX512NOTDQ-LABEL: load_v4i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000695; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000696; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
697; AVX512NOTDQ-NEXT: kmovd %eax, %k0
698; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
699; AVX512NOTDQ-NEXT: kmovd %k0, %eax
700; AVX512NOTDQ-NEXT: movb %al, (%rsi)
701; AVX512NOTDQ-NEXT: retq
702 %d0 = load <4 x i1>, <4 x i1>* %a0
703 %d1 = shufflevector <4 x i1> %d0,<4 x i1> undef,<1 x i32><i32 2>
704 store <1 x i1> %d1, <1 x i1>* %a1
705 ret void
706}
707define void @load_v4i1_broadcast_3_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
708; AVX512-LABEL: load_v4i1_broadcast_3_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000709; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000710; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000711; AVX512-NEXT: kshiftrb $3, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000712; AVX512-NEXT: kmovb %k0, (%rsi)
713; AVX512-NEXT: retq
714;
715; AVX512NOTDQ-LABEL: load_v4i1_broadcast_3_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000716; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000717; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
718; AVX512NOTDQ-NEXT: kmovd %eax, %k0
719; AVX512NOTDQ-NEXT: kshiftrw $3, %k0, %k0
720; AVX512NOTDQ-NEXT: kmovd %k0, %eax
721; AVX512NOTDQ-NEXT: movb %al, (%rsi)
722; AVX512NOTDQ-NEXT: retq
723 %d0 = load <4 x i1>, <4 x i1>* %a0
724 %d1 = shufflevector <4 x i1> %d0,<4 x i1> undef,<1 x i32><i32 3>
725 store <1 x i1> %d1, <1 x i1>* %a1
726 ret void
727}
728define void @load_v8i1_broadcast_4_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
729; AVX512-LABEL: load_v8i1_broadcast_4_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000730; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000731; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000732; AVX512-NEXT: kshiftrb $4, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000733; AVX512-NEXT: kmovb %k0, (%rsi)
734; AVX512-NEXT: retq
735;
736; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000737; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000738; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
739; AVX512NOTDQ-NEXT: kmovd %eax, %k0
740; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k0
741; AVX512NOTDQ-NEXT: kmovd %k0, %eax
742; AVX512NOTDQ-NEXT: movb %al, (%rsi)
743; AVX512NOTDQ-NEXT: retq
744 %d0 = load <8 x i1>, <8 x i1>* %a0
745 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<1 x i32><i32 4>
746 store <1 x i1> %d1, <1 x i1>* %a1
747 ret void
748}
749define void @load_v8i1_broadcast_4_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
750; AVX512-LABEL: load_v8i1_broadcast_4_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000751; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000752; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000753; AVX512-NEXT: kshiftrb $4, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000754; AVX512-NEXT: vpmovm2q %k0, %xmm0
755; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
756; AVX512-NEXT: vpmovq2m %xmm0, %k0
757; AVX512-NEXT: kmovb %k0, (%rsi)
758; AVX512-NEXT: retq
759;
760; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000761; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000762; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
763; AVX512NOTDQ-NEXT: kmovd %eax, %k0
764; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k1
765; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
766; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
767; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
768; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
769; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
770; AVX512NOTDQ-NEXT: kmovd %k0, %eax
771; AVX512NOTDQ-NEXT: movb %al, (%rsi)
772; AVX512NOTDQ-NEXT: retq
773 %d0 = load <8 x i1>, <8 x i1>* %a0
774 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 4,i32 4>
775 store <2 x i1> %d1, <2 x i1>* %a1
776 ret void
777}
778define void @load_v8i1_broadcast_7_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
779; AVX512-LABEL: load_v8i1_broadcast_7_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000780; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000781; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000782; AVX512-NEXT: kshiftrb $7, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000783; AVX512-NEXT: kmovb %k0, (%rsi)
784; AVX512-NEXT: retq
785;
786; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000787; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000788; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
789; AVX512NOTDQ-NEXT: kmovd %eax, %k0
790; AVX512NOTDQ-NEXT: kshiftrw $7, %k0, %k0
791; AVX512NOTDQ-NEXT: kmovd %k0, %eax
792; AVX512NOTDQ-NEXT: movb %al, (%rsi)
793; AVX512NOTDQ-NEXT: retq
794 %d0 = load <8 x i1>, <8 x i1>* %a0
795 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<1 x i32><i32 7>
796 store <1 x i1> %d1, <1 x i1>* %a1
797 ret void
798}
799define void @load_v8i1_broadcast_7_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
800; AVX512-LABEL: load_v8i1_broadcast_7_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000801; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000802; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000803; AVX512-NEXT: kshiftrb $6, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000804; AVX512-NEXT: vpmovm2q %k0, %xmm0
805; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
806; AVX512-NEXT: vpmovq2m %xmm0, %k0
807; AVX512-NEXT: kmovb %k0, (%rsi)
808; AVX512-NEXT: retq
809;
810; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000811; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000812; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
813; AVX512NOTDQ-NEXT: kmovd %eax, %k0
814; AVX512NOTDQ-NEXT: kshiftrw $6, %k0, %k1
815; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
816; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
817; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
818; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
819; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
820; AVX512NOTDQ-NEXT: kmovd %k0, %eax
821; AVX512NOTDQ-NEXT: movb %al, (%rsi)
822; AVX512NOTDQ-NEXT: retq
823 %d0 = load <8 x i1>, <8 x i1>* %a0
824 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 7,i32 7>
825 store <2 x i1> %d1, <2 x i1>* %a1
826 ret void
827}
828define void @load_v16i1_broadcast_8_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
829; AVX512-LABEL: load_v16i1_broadcast_8_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000830; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000831; AVX512-NEXT: kmovw (%rdi), %k0
832; AVX512-NEXT: kshiftrw $8, %k0, %k0
833; AVX512-NEXT: kmovb %k0, (%rsi)
834; AVX512-NEXT: retq
835;
836; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000837; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000838; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
839; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k0
840; AVX512NOTDQ-NEXT: kmovd %k0, %eax
841; AVX512NOTDQ-NEXT: movb %al, (%rsi)
842; AVX512NOTDQ-NEXT: retq
843 %d0 = load <16 x i1>, <16 x i1>* %a0
844 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<1 x i32><i32 8>
845 store <1 x i1> %d1, <1 x i1>* %a1
846 ret void
847}
848define void @load_v16i1_broadcast_8_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
849; AVX512-LABEL: load_v16i1_broadcast_8_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000850; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000851; AVX512-NEXT: kmovw (%rdi), %k0
852; AVX512-NEXT: kshiftrw $8, %k0, %k0
853; AVX512-NEXT: vpmovm2q %k0, %xmm0
854; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
855; AVX512-NEXT: vpmovq2m %xmm0, %k0
856; AVX512-NEXT: kmovb %k0, (%rsi)
857; AVX512-NEXT: retq
858;
859; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000860; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000861; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
862; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
863; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
864; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
865; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
866; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
867; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
868; AVX512NOTDQ-NEXT: kmovd %k0, %eax
869; AVX512NOTDQ-NEXT: movb %al, (%rsi)
870; AVX512NOTDQ-NEXT: retq
871 %d0 = load <16 x i1>, <16 x i1>* %a0
872 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 8,i32 8>
873 store <2 x i1> %d1, <2 x i1>* %a1
874 ret void
875}
876define void @load_v16i1_broadcast_8_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
877; AVX512-LABEL: load_v16i1_broadcast_8_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000878; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000879; AVX512-NEXT: kmovw (%rdi), %k0
880; AVX512-NEXT: kshiftrw $8, %k0, %k0
881; AVX512-NEXT: vpmovm2d %k0, %xmm0
882; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
883; AVX512-NEXT: vpmovd2m %xmm0, %k0
884; AVX512-NEXT: kmovb %k0, (%rsi)
885; AVX512-NEXT: retq
886;
887; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000888; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000889; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
890; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
891; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
892; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
893; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
894; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
895; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
896; AVX512NOTDQ-NEXT: kmovd %k0, %eax
897; AVX512NOTDQ-NEXT: movb %al, (%rsi)
898; AVX512NOTDQ-NEXT: retq
899 %d0 = load <16 x i1>, <16 x i1>* %a0
900 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 8,i32 8,i32 8,i32 8>
901 store <4 x i1> %d1, <4 x i1>* %a1
902 ret void
903}
904define void @load_v16i1_broadcast_15_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
905; AVX512-LABEL: load_v16i1_broadcast_15_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000906; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000907; AVX512-NEXT: kmovw (%rdi), %k0
908; AVX512-NEXT: kshiftrw $15, %k0, %k0
909; AVX512-NEXT: kmovb %k0, (%rsi)
910; AVX512-NEXT: retq
911;
912; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000913; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000914; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
915; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
916; AVX512NOTDQ-NEXT: kmovd %k0, %eax
917; AVX512NOTDQ-NEXT: movb %al, (%rsi)
918; AVX512NOTDQ-NEXT: retq
919 %d0 = load <16 x i1>, <16 x i1>* %a0
920 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<1 x i32><i32 15>
921 store <1 x i1> %d1, <1 x i1>* %a1
922 ret void
923}
924define void @load_v16i1_broadcast_15_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
925; AVX512-LABEL: load_v16i1_broadcast_15_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000926; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000927; AVX512-NEXT: kmovw (%rdi), %k0
928; AVX512-NEXT: kshiftrw $14, %k0, %k0
929; AVX512-NEXT: vpmovm2q %k0, %xmm0
930; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
931; AVX512-NEXT: vpmovq2m %xmm0, %k0
932; AVX512-NEXT: kmovb %k0, (%rsi)
933; AVX512-NEXT: retq
934;
935; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000936; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000937; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
938; AVX512NOTDQ-NEXT: kshiftrw $14, %k0, %k1
939; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
940; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
941; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
942; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
943; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
944; AVX512NOTDQ-NEXT: kmovd %k0, %eax
945; AVX512NOTDQ-NEXT: movb %al, (%rsi)
946; AVX512NOTDQ-NEXT: retq
947 %d0 = load <16 x i1>, <16 x i1>* %a0
948 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 15,i32 15>
949 store <2 x i1> %d1, <2 x i1>* %a1
950 ret void
951}
952define void @load_v16i1_broadcast_15_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
953; AVX512-LABEL: load_v16i1_broadcast_15_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000954; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000955; AVX512-NEXT: kmovw (%rdi), %k0
956; AVX512-NEXT: kshiftrw $12, %k0, %k0
957; AVX512-NEXT: vpmovm2d %k0, %xmm0
958; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
959; AVX512-NEXT: vpmovd2m %xmm0, %k0
960; AVX512-NEXT: kmovb %k0, (%rsi)
961; AVX512-NEXT: retq
962;
963; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000964; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000965; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
966; AVX512NOTDQ-NEXT: kshiftrw $12, %k0, %k1
967; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
968; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
969; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
970; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
971; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
972; AVX512NOTDQ-NEXT: kmovd %k0, %eax
973; AVX512NOTDQ-NEXT: movb %al, (%rsi)
974; AVX512NOTDQ-NEXT: retq
975 %d0 = load <16 x i1>, <16 x i1>* %a0
976 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 15,i32 15,i32 15,i32 15>
977 store <4 x i1> %d1, <4 x i1>* %a1
978 ret void
979}
980define void @load_v32i1_broadcast_16_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
981; AVX512-LABEL: load_v32i1_broadcast_16_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000982; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000983; AVX512-NEXT: kmovd (%rdi), %k0
984; AVX512-NEXT: kshiftrd $16, %k0, %k0
985; AVX512-NEXT: kmovb %k0, (%rsi)
986; AVX512-NEXT: retq
987;
988; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000989; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000990; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
991; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k0
992; AVX512NOTDQ-NEXT: kmovd %k0, %eax
993; AVX512NOTDQ-NEXT: movb %al, (%rsi)
994; AVX512NOTDQ-NEXT: retq
995 %d0 = load <32 x i1>, <32 x i1>* %a0
996 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<1 x i32><i32 16>
997 store <1 x i1> %d1, <1 x i1>* %a1
998 ret void
999}
1000define void @load_v32i1_broadcast_16_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
1001; AVX512-LABEL: load_v32i1_broadcast_16_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001002; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001003; AVX512-NEXT: kmovd (%rdi), %k0
1004; AVX512-NEXT: kshiftrd $16, %k0, %k0
1005; AVX512-NEXT: vpmovm2q %k0, %xmm0
1006; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
1007; AVX512-NEXT: vpmovq2m %xmm0, %k0
1008; AVX512-NEXT: kmovb %k0, (%rsi)
1009; AVX512-NEXT: retq
1010;
1011; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001012; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001013; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1014; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
1015; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1016; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1017; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
1018; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
1019; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
1020; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1021; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1022; AVX512NOTDQ-NEXT: retq
1023 %d0 = load <32 x i1>, <32 x i1>* %a0
1024 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 16,i32 16>
1025 store <2 x i1> %d1, <2 x i1>* %a1
1026 ret void
1027}
1028define void @load_v32i1_broadcast_16_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
1029; AVX512-LABEL: load_v32i1_broadcast_16_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001030; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001031; AVX512-NEXT: kmovd (%rdi), %k0
1032; AVX512-NEXT: kshiftrd $16, %k0, %k0
1033; AVX512-NEXT: vpmovm2d %k0, %xmm0
1034; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
1035; AVX512-NEXT: vpmovd2m %xmm0, %k0
1036; AVX512-NEXT: kmovb %k0, (%rsi)
1037; AVX512-NEXT: retq
1038;
1039; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001040; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001041; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1042; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
1043; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1044; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1045; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
1046; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
1047; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1048; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1049; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1050; AVX512NOTDQ-NEXT: retq
1051 %d0 = load <32 x i1>, <32 x i1>* %a0
1052 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 16,i32 16,i32 16,i32 16>
1053 store <4 x i1> %d1, <4 x i1>* %a1
1054 ret void
1055}
1056define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
1057; AVX512-LABEL: load_v32i1_broadcast_16_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001058; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001059; AVX512-NEXT: kmovd (%rdi), %k0
1060; AVX512-NEXT: kshiftrd $16, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +00001061; AVX512-NEXT: vpmovm2d %k0, %ymm0
1062; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
1063; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001064; AVX512-NEXT: kmovb %k0, (%rsi)
1065; AVX512-NEXT: vzeroupper
1066; AVX512-NEXT: retq
1067;
1068; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001069; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001070; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1071; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +00001072; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1073; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1074; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
1075; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
1076; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001077; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1078; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1079; AVX512NOTDQ-NEXT: vzeroupper
1080; AVX512NOTDQ-NEXT: retq
1081 %d0 = load <32 x i1>, <32 x i1>* %a0
1082 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16>
1083 store <8 x i1> %d1, <8 x i1>* %a1
1084 ret void
1085}
1086define void @load_v32i1_broadcast_31_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
1087; AVX512-LABEL: load_v32i1_broadcast_31_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001088; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001089; AVX512-NEXT: kmovd (%rdi), %k0
1090; AVX512-NEXT: kshiftrd $31, %k0, %k0
1091; AVX512-NEXT: kmovb %k0, (%rsi)
1092; AVX512-NEXT: retq
1093;
1094; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001095; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001096; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1097; AVX512NOTDQ-NEXT: kshiftrd $31, %k0, %k0
1098; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1099; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1100; AVX512NOTDQ-NEXT: retq
1101 %d0 = load <32 x i1>, <32 x i1>* %a0
1102 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<1 x i32><i32 31>
1103 store <1 x i1> %d1, <1 x i1>* %a1
1104 ret void
1105}
1106define void @load_v32i1_broadcast_31_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
1107; AVX512-LABEL: load_v32i1_broadcast_31_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001108; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001109; AVX512-NEXT: kmovd (%rdi), %k0
1110; AVX512-NEXT: kshiftrd $30, %k0, %k0
1111; AVX512-NEXT: vpmovm2q %k0, %xmm0
1112; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1113; AVX512-NEXT: vpmovq2m %xmm0, %k0
1114; AVX512-NEXT: kmovb %k0, (%rsi)
1115; AVX512-NEXT: retq
1116;
1117; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001118; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001119; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1120; AVX512NOTDQ-NEXT: kshiftrd $30, %k0, %k1
1121; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1122; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1123; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1124; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
1125; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
1126; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1127; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1128; AVX512NOTDQ-NEXT: retq
1129 %d0 = load <32 x i1>, <32 x i1>* %a0
1130 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 31,i32 31>
1131 store <2 x i1> %d1, <2 x i1>* %a1
1132 ret void
1133}
1134define void @load_v32i1_broadcast_31_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
1135; AVX512-LABEL: load_v32i1_broadcast_31_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001136; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001137; AVX512-NEXT: kmovd (%rdi), %k0
1138; AVX512-NEXT: kshiftrd $28, %k0, %k0
1139; AVX512-NEXT: vpmovm2d %k0, %xmm0
1140; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1141; AVX512-NEXT: vpmovd2m %xmm0, %k0
1142; AVX512-NEXT: kmovb %k0, (%rsi)
1143; AVX512-NEXT: retq
1144;
1145; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001146; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001147; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1148; AVX512NOTDQ-NEXT: kshiftrd $28, %k0, %k1
1149; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1150; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1151; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1152; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
1153; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1154; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1155; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1156; AVX512NOTDQ-NEXT: retq
1157 %d0 = load <32 x i1>, <32 x i1>* %a0
1158 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 31,i32 31,i32 31,i32 31>
1159 store <4 x i1> %d1, <4 x i1>* %a1
1160 ret void
1161}
1162define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
1163; AVX512-LABEL: load_v32i1_broadcast_31_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001164; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001165; AVX512-NEXT: kmovd (%rdi), %k0
1166; AVX512-NEXT: kshiftrd $24, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +00001167; AVX512-NEXT: vpmovm2d %k0, %ymm0
1168; AVX512-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
1169; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
1170; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001171; AVX512-NEXT: kmovb %k0, (%rsi)
1172; AVX512-NEXT: vzeroupper
1173; AVX512-NEXT: retq
1174;
1175; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001176; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001177; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1178; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +00001179; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1180; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1181; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
1182; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
1183; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
1184; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001185; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1186; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1187; AVX512NOTDQ-NEXT: vzeroupper
1188; AVX512NOTDQ-NEXT: retq
1189 %d0 = load <32 x i1>, <32 x i1>* %a0
1190 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31>
1191 store <8 x i1> %d1, <8 x i1>* %a1
1192 ret void
1193}
1194define void @load_v64i1_broadcast_32_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
1195; AVX512-LABEL: load_v64i1_broadcast_32_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001196; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001197; AVX512-NEXT: kmovq (%rdi), %k0
1198; AVX512-NEXT: kshiftrq $32, %k0, %k0
1199; AVX512-NEXT: kmovb %k0, (%rsi)
1200; AVX512-NEXT: retq
1201;
1202; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001203; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001204; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1205; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k0
1206; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1207; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1208; AVX512NOTDQ-NEXT: retq
1209 %d0 = load <64 x i1>, <64 x i1>* %a0
1210 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<1 x i32><i32 32>
1211 store <1 x i1> %d1, <1 x i1>* %a1
1212 ret void
1213}
1214define void @load_v64i1_broadcast_32_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
1215; AVX512-LABEL: load_v64i1_broadcast_32_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001216; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001217; AVX512-NEXT: kmovq (%rdi), %k0
1218; AVX512-NEXT: kshiftrq $32, %k0, %k0
1219; AVX512-NEXT: vpmovm2q %k0, %xmm0
1220; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
1221; AVX512-NEXT: vpmovq2m %xmm0, %k0
1222; AVX512-NEXT: kmovb %k0, (%rsi)
1223; AVX512-NEXT: retq
1224;
1225; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001226; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001227; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1228; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
1229; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1230; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1231; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
1232; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
1233; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
1234; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1235; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1236; AVX512NOTDQ-NEXT: retq
1237 %d0 = load <64 x i1>, <64 x i1>* %a0
1238 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 32,i32 32>
1239 store <2 x i1> %d1, <2 x i1>* %a1
1240 ret void
1241}
1242define void @load_v64i1_broadcast_32_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
1243; AVX512-LABEL: load_v64i1_broadcast_32_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001244; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001245; AVX512-NEXT: kmovq (%rdi), %k0
1246; AVX512-NEXT: kshiftrq $32, %k0, %k0
1247; AVX512-NEXT: vpmovm2d %k0, %xmm0
1248; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
1249; AVX512-NEXT: vpmovd2m %xmm0, %k0
1250; AVX512-NEXT: kmovb %k0, (%rsi)
1251; AVX512-NEXT: retq
1252;
1253; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001254; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001255; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1256; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
1257; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1258; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1259; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
1260; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
1261; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1262; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1263; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1264; AVX512NOTDQ-NEXT: retq
1265 %d0 = load <64 x i1>, <64 x i1>* %a0
1266 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 32,i32 32,i32 32,i32 32>
1267 store <4 x i1> %d1, <4 x i1>* %a1
1268 ret void
1269}
1270define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
1271; AVX512-LABEL: load_v64i1_broadcast_32_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001272; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001273; AVX512-NEXT: kmovq (%rdi), %k0
1274; AVX512-NEXT: kshiftrq $32, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +00001275; AVX512-NEXT: vpmovm2d %k0, %ymm0
1276; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
1277; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001278; AVX512-NEXT: kmovb %k0, (%rsi)
1279; AVX512-NEXT: vzeroupper
1280; AVX512-NEXT: retq
1281;
1282; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001283; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001284; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1285; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +00001286; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1287; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1288; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
1289; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
1290; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001291; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1292; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1293; AVX512NOTDQ-NEXT: vzeroupper
1294; AVX512NOTDQ-NEXT: retq
1295 %d0 = load <64 x i1>, <64 x i1>* %a0
1296 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
1297 store <8 x i1> %d1, <8 x i1>* %a1
1298 ret void
1299}
1300define void @load_v64i1_broadcast_32_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
1301; AVX512-LABEL: load_v64i1_broadcast_32_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001302; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001303; AVX512-NEXT: kmovq (%rdi), %k0
1304; AVX512-NEXT: kshiftrq $32, %k0, %k0
1305; AVX512-NEXT: vpmovm2d %k0, %zmm0
1306; AVX512-NEXT: vpbroadcastd %xmm0, %zmm0
1307; AVX512-NEXT: vpmovd2m %zmm0, %k0
1308; AVX512-NEXT: kmovw %k0, (%rsi)
1309; AVX512-NEXT: vzeroupper
1310; AVX512-NEXT: retq
1311;
1312; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001313; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001314; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1315; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
1316; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1317; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %zmm0
1318; AVX512NOTDQ-NEXT: vpslld $31, %zmm0, %zmm0
1319; AVX512NOTDQ-NEXT: vptestmd %zmm0, %zmm0, %k0
1320; AVX512NOTDQ-NEXT: kmovw %k0, (%rsi)
1321; AVX512NOTDQ-NEXT: vzeroupper
1322; AVX512NOTDQ-NEXT: retq
1323 %d0 = load <64 x i1>, <64 x i1>* %a0
1324 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
1325 store <16 x i1> %d1, <16 x i1>* %a1
1326 ret void
1327}
1328define void @load_v64i1_broadcast_63_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
1329; AVX512-LABEL: load_v64i1_broadcast_63_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001330; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001331; AVX512-NEXT: kmovq (%rdi), %k0
1332; AVX512-NEXT: kshiftrq $63, %k0, %k0
1333; AVX512-NEXT: kmovb %k0, (%rsi)
1334; AVX512-NEXT: retq
1335;
1336; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001337; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001338; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1339; AVX512NOTDQ-NEXT: kshiftrq $63, %k0, %k0
1340; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1341; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1342; AVX512NOTDQ-NEXT: retq
1343 %d0 = load <64 x i1>, <64 x i1>* %a0
1344 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<1 x i32><i32 63>
1345 store <1 x i1> %d1, <1 x i1>* %a1
1346 ret void
1347}
1348define void @load_v64i1_broadcast_63_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
1349; AVX512-LABEL: load_v64i1_broadcast_63_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001350; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001351; AVX512-NEXT: kmovq (%rdi), %k0
1352; AVX512-NEXT: kshiftrq $62, %k0, %k0
1353; AVX512-NEXT: vpmovm2q %k0, %xmm0
1354; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1355; AVX512-NEXT: vpmovq2m %xmm0, %k0
1356; AVX512-NEXT: kmovb %k0, (%rsi)
1357; AVX512-NEXT: retq
1358;
1359; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001360; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001361; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1362; AVX512NOTDQ-NEXT: kshiftrq $62, %k0, %k1
1363; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1364; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1365; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1366; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
1367; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
1368; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1369; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1370; AVX512NOTDQ-NEXT: retq
1371 %d0 = load <64 x i1>, <64 x i1>* %a0
1372 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 63,i32 63>
1373 store <2 x i1> %d1, <2 x i1>* %a1
1374 ret void
1375}
1376define void @load_v64i1_broadcast_63_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
1377; AVX512-LABEL: load_v64i1_broadcast_63_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001378; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001379; AVX512-NEXT: kmovq (%rdi), %k0
1380; AVX512-NEXT: kshiftrq $60, %k0, %k0
1381; AVX512-NEXT: vpmovm2d %k0, %xmm0
1382; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1383; AVX512-NEXT: vpmovd2m %xmm0, %k0
1384; AVX512-NEXT: kmovb %k0, (%rsi)
1385; AVX512-NEXT: retq
1386;
1387; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001388; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001389; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1390; AVX512NOTDQ-NEXT: kshiftrq $60, %k0, %k1
1391; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1392; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1393; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1394; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
1395; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1396; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1397; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1398; AVX512NOTDQ-NEXT: retq
1399 %d0 = load <64 x i1>, <64 x i1>* %a0
1400 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 63,i32 63,i32 63,i32 63>
1401 store <4 x i1> %d1, <4 x i1>* %a1
1402 ret void
1403}
1404define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
1405; AVX512-LABEL: load_v64i1_broadcast_63_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001406; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001407; AVX512-NEXT: kmovq (%rdi), %k0
1408; AVX512-NEXT: kshiftrq $56, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +00001409; AVX512-NEXT: vpmovm2d %k0, %ymm0
1410; AVX512-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
1411; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
1412; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001413; AVX512-NEXT: kmovb %k0, (%rsi)
1414; AVX512-NEXT: vzeroupper
1415; AVX512-NEXT: retq
1416;
1417; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001418; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001419; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1420; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +00001421; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1422; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1423; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
1424; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
1425; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
1426; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001427; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1428; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1429; AVX512NOTDQ-NEXT: vzeroupper
1430; AVX512NOTDQ-NEXT: retq
1431 %d0 = load <64 x i1>, <64 x i1>* %a0
1432 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
1433 store <8 x i1> %d1, <8 x i1>* %a1
1434 ret void
1435}
1436define void @load_v64i1_broadcast_63_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
1437; AVX512-LABEL: load_v64i1_broadcast_63_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001438; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001439; AVX512-NEXT: kmovq (%rdi), %k0
1440; AVX512-NEXT: kshiftrq $48, %k0, %k0
1441; AVX512-NEXT: vpmovm2d %k0, %zmm0
1442; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1443; AVX512-NEXT: vpermd %zmm0, %zmm1, %zmm0
1444; AVX512-NEXT: vpmovd2m %zmm0, %k0
1445; AVX512-NEXT: kmovw %k0, (%rsi)
1446; AVX512-NEXT: vzeroupper
1447; AVX512-NEXT: retq
1448;
1449; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001450; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001451; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1452; AVX512NOTDQ-NEXT: kshiftrq $48, %k0, %k1
1453; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1454; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1455; AVX512NOTDQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
1456; AVX512NOTDQ-NEXT: vpslld $31, %zmm0, %zmm0
1457; AVX512NOTDQ-NEXT: vptestmd %zmm0, %zmm0, %k0
1458; AVX512NOTDQ-NEXT: kmovw %k0, (%rsi)
1459; AVX512NOTDQ-NEXT: vzeroupper
1460; AVX512NOTDQ-NEXT: retq
1461 %d0 = load <64 x i1>, <64 x i1>* %a0
1462 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
1463 store <16 x i1> %d1, <16 x i1>* %a1
1464 ret void
1465}
1466