blob: 00fad6fbf59bf020bec21add8249fa380fe8be33 [file] [log] [blame]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Zvi Rackover72b0bb12018-01-09 16:26:06 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512NOTDQ
Michael Zuckerman0c20b692017-11-02 12:19:36 +00004
5define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
6; AVX512-LABEL: load_v8i1_broadcast_4_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00008; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +00009; AVX512-NEXT: kshiftrb $4, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +000010; AVX512-NEXT: vpmovm2q %k0, %xmm2
11; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
12; AVX512-NEXT: vpmovq2m %xmm2, %k1
13; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
14; AVX512-NEXT: vmovapd %xmm1, (%rsi)
15; AVX512-NEXT: retq
16;
17; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000018; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000019; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
20; AVX512NOTDQ-NEXT: kmovd %eax, %k0
21; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k1
22; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
23; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
24; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +000025; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
26; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
27; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
28; AVX512NOTDQ-NEXT: retq
29 %d0 = load <8 x i1>, <8 x i1>* %a0
30 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 4,i32 4>
31 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
32 store <2 x double> %d2, <2 x double>* %a3
33 ret void
34}
35define void @load_v8i1_broadcast_7_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
36; AVX512-LABEL: load_v8i1_broadcast_7_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000037; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000038; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +000039; AVX512-NEXT: kshiftrb $6, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +000040; AVX512-NEXT: vpmovm2q %k0, %xmm2
41; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
42; AVX512-NEXT: vpmovq2m %xmm2, %k1
43; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
44; AVX512-NEXT: vmovapd %xmm1, (%rsi)
45; AVX512-NEXT: retq
46;
47; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000048; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000049; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
50; AVX512NOTDQ-NEXT: kmovd %eax, %k0
51; AVX512NOTDQ-NEXT: kshiftrw $6, %k0, %k1
52; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
53; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
54; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +000055; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
56; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
57; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
58; AVX512NOTDQ-NEXT: retq
59 %d0 = load <8 x i1>, <8 x i1>* %a0
60 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 7,i32 7>
61 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
62 store <2 x double> %d2, <2 x double>* %a3
63 ret void
64}
65define void @load_v16i1_broadcast_8_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
66; AVX512-LABEL: load_v16i1_broadcast_8_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000067; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000068; AVX512-NEXT: kmovw (%rdi), %k0
69; AVX512-NEXT: kshiftrw $8, %k0, %k0
70; AVX512-NEXT: vpmovm2q %k0, %xmm2
71; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
72; AVX512-NEXT: vpmovq2m %xmm2, %k1
73; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
74; AVX512-NEXT: vmovapd %xmm1, (%rsi)
75; AVX512-NEXT: retq
76;
77; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000078; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000079; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
80; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
81; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
82; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
83; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +000084; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
85; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
86; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
87; AVX512NOTDQ-NEXT: retq
88 %d0 = load <16 x i1>, <16 x i1>* %a0
89 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 8,i32 8>
90 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
91 store <2 x double> %d2, <2 x double>* %a3
92 ret void
93}
94define void @load_v16i1_broadcast_8_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
95; AVX512-LABEL: load_v16i1_broadcast_8_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000096; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +000097; AVX512-NEXT: kmovw (%rdi), %k0
98; AVX512-NEXT: kshiftrw $8, %k0, %k0
99; AVX512-NEXT: vpmovm2d %k0, %xmm2
100; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
101; AVX512-NEXT: vpmovd2m %xmm2, %k1
102; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
103; AVX512-NEXT: vmovaps %xmm1, (%rsi)
104; AVX512-NEXT: retq
105;
106; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000107; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000108; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
109; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
110; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
111; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
112; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000113; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
114; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
115; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
116; AVX512NOTDQ-NEXT: retq
117 %d0 = load <16 x i1>, <16 x i1>* %a0
118 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 8,i32 8,i32 8,i32 8>
119 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
120 store <4 x float> %d2, <4 x float>* %a3
121 ret void
122}
123define void @load_v16i1_broadcast_15_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
124; AVX512-LABEL: load_v16i1_broadcast_15_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000125; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000126; AVX512-NEXT: kmovw (%rdi), %k0
127; AVX512-NEXT: kshiftrw $14, %k0, %k0
128; AVX512-NEXT: vpmovm2q %k0, %xmm2
129; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
130; AVX512-NEXT: vpmovq2m %xmm2, %k1
131; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
132; AVX512-NEXT: vmovapd %xmm1, (%rsi)
133; AVX512-NEXT: retq
134;
135; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000136; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000137; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
138; AVX512NOTDQ-NEXT: kshiftrw $14, %k0, %k1
139; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
140; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
141; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000142; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
143; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
144; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
145; AVX512NOTDQ-NEXT: retq
146 %d0 = load <16 x i1>, <16 x i1>* %a0
147 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 15,i32 15>
148 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
149 store <2 x double> %d2, <2 x double>* %a3
150 ret void
151}
152define void @load_v16i1_broadcast_15_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
153; AVX512-LABEL: load_v16i1_broadcast_15_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000154; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000155; AVX512-NEXT: kmovw (%rdi), %k0
156; AVX512-NEXT: kshiftrw $12, %k0, %k0
157; AVX512-NEXT: vpmovm2d %k0, %xmm2
158; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
159; AVX512-NEXT: vpmovd2m %xmm2, %k1
160; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
161; AVX512-NEXT: vmovaps %xmm1, (%rsi)
162; AVX512-NEXT: retq
163;
164; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000165; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000166; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
167; AVX512NOTDQ-NEXT: kshiftrw $12, %k0, %k1
168; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
169; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
170; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000171; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
172; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
173; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
174; AVX512NOTDQ-NEXT: retq
175 %d0 = load <16 x i1>, <16 x i1>* %a0
176 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 15,i32 15,i32 15,i32 15>
177 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
178 store <4 x float> %d2, <4 x float>* %a3
179 ret void
180}
181define void @load_v32i1_broadcast_16_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
182; AVX512-LABEL: load_v32i1_broadcast_16_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000183; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000184; AVX512-NEXT: kmovd (%rdi), %k0
185; AVX512-NEXT: kshiftrd $16, %k0, %k0
186; AVX512-NEXT: vpmovm2q %k0, %xmm2
187; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
188; AVX512-NEXT: vpmovq2m %xmm2, %k1
189; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
190; AVX512-NEXT: vmovapd %xmm1, (%rsi)
191; AVX512-NEXT: retq
192;
193; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000194; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000195; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
196; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
197; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
198; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
199; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000200; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
201; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
202; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
203; AVX512NOTDQ-NEXT: retq
204 %d0 = load <32 x i1>, <32 x i1>* %a0
205 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 16,i32 16>
206 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
207 store <2 x double> %d2, <2 x double>* %a3
208 ret void
209}
210define void @load_v32i1_broadcast_16_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
211; AVX512-LABEL: load_v32i1_broadcast_16_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000212; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000213; AVX512-NEXT: kmovd (%rdi), %k0
214; AVX512-NEXT: kshiftrd $16, %k0, %k0
215; AVX512-NEXT: vpmovm2d %k0, %xmm2
216; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
217; AVX512-NEXT: vpmovd2m %xmm2, %k1
218; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
219; AVX512-NEXT: vmovaps %xmm1, (%rsi)
220; AVX512-NEXT: retq
221;
222; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000223; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000224; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
225; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
226; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
227; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
228; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000229; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
230; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
231; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
232; AVX512NOTDQ-NEXT: retq
233 %d0 = load <32 x i1>, <32 x i1>* %a0
234 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 16,i32 16,i32 16,i32 16>
235 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
236 store <4 x float> %d2, <4 x float>* %a3
237 ret void
238}
239define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
240; AVX512-LABEL: load_v32i1_broadcast_16_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000241; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000242; AVX512-NEXT: kmovd (%rdi), %k0
243; AVX512-NEXT: kshiftrd $16, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +0000244; AVX512-NEXT: vpmovm2d %k0, %ymm2
245; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
246; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000247; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
248; AVX512-NEXT: vmovaps %ymm1, (%rsi)
249; AVX512-NEXT: vzeroupper
250; AVX512-NEXT: retq
251;
252; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000253; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000254; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
255; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +0000256; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
257; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
258; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000259; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000260; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
261; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
262; AVX512NOTDQ-NEXT: vzeroupper
263; AVX512NOTDQ-NEXT: retq
264 %d0 = load <32 x i1>, <32 x i1>* %a0
265 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16>
266 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
267 store <8 x float> %d2, <8 x float>* %a3
268 ret void
269}
270define void @load_v32i1_broadcast_31_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
271; AVX512-LABEL: load_v32i1_broadcast_31_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000272; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000273; AVX512-NEXT: kmovd (%rdi), %k0
274; AVX512-NEXT: kshiftrd $30, %k0, %k0
275; AVX512-NEXT: vpmovm2q %k0, %xmm2
276; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
277; AVX512-NEXT: vpmovq2m %xmm2, %k1
278; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
279; AVX512-NEXT: vmovapd %xmm1, (%rsi)
280; AVX512-NEXT: retq
281;
282; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000283; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000284; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
285; AVX512NOTDQ-NEXT: kshiftrd $30, %k0, %k1
286; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
287; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
288; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000289; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
290; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
291; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
292; AVX512NOTDQ-NEXT: retq
293 %d0 = load <32 x i1>, <32 x i1>* %a0
294 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 31,i32 31>
295 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
296 store <2 x double> %d2, <2 x double>* %a3
297 ret void
298}
299define void @load_v32i1_broadcast_31_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
300; AVX512-LABEL: load_v32i1_broadcast_31_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000301; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000302; AVX512-NEXT: kmovd (%rdi), %k0
303; AVX512-NEXT: kshiftrd $28, %k0, %k0
304; AVX512-NEXT: vpmovm2d %k0, %xmm2
305; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
306; AVX512-NEXT: vpmovd2m %xmm2, %k1
307; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
308; AVX512-NEXT: vmovaps %xmm1, (%rsi)
309; AVX512-NEXT: retq
310;
311; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000312; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000313; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
314; AVX512NOTDQ-NEXT: kshiftrd $28, %k0, %k1
315; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
316; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
317; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000318; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
319; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
320; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
321; AVX512NOTDQ-NEXT: retq
322 %d0 = load <32 x i1>, <32 x i1>* %a0
323 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 31,i32 31,i32 31,i32 31>
324 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
325 store <4 x float> %d2, <4 x float>* %a3
326 ret void
327}
328define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
329; AVX512-LABEL: load_v32i1_broadcast_31_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000330; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000331; AVX512-NEXT: kmovd (%rdi), %k0
332; AVX512-NEXT: kshiftrd $24, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +0000333; AVX512-NEXT: vpmovm2d %k0, %ymm2
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000334; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
335; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000336; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000337; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
338; AVX512-NEXT: vmovaps %ymm1, (%rsi)
339; AVX512-NEXT: vzeroupper
340; AVX512-NEXT: retq
341;
342; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000343; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000344; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
345; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +0000346; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
347; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000348; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
349; AVX512NOTDQ-NEXT: vpermd %ymm2, %ymm3, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000350; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000351; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
352; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
353; AVX512NOTDQ-NEXT: vzeroupper
354; AVX512NOTDQ-NEXT: retq
355 %d0 = load <32 x i1>, <32 x i1>* %a0
356 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31>
357 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
358 store <8 x float> %d2, <8 x float>* %a3
359 ret void
360}
361define void @load_v64i1_broadcast_32_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
362; AVX512-LABEL: load_v64i1_broadcast_32_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000363; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000364; AVX512-NEXT: kmovq (%rdi), %k0
365; AVX512-NEXT: kshiftrq $32, %k0, %k0
366; AVX512-NEXT: vpmovm2q %k0, %xmm2
367; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
368; AVX512-NEXT: vpmovq2m %xmm2, %k1
369; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
370; AVX512-NEXT: vmovapd %xmm1, (%rsi)
371; AVX512-NEXT: retq
372;
373; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000374; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000375; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
376; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
377; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
378; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
379; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000380; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
381; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
382; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
383; AVX512NOTDQ-NEXT: retq
384 %d0 = load <64 x i1>, <64 x i1>* %a0
385 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 32,i32 32>
386 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
387 store <2 x double> %d2, <2 x double>* %a3
388 ret void
389}
390define void @load_v64i1_broadcast_32_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
391; AVX512-LABEL: load_v64i1_broadcast_32_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000392; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000393; AVX512-NEXT: kmovq (%rdi), %k0
394; AVX512-NEXT: kshiftrq $32, %k0, %k0
395; AVX512-NEXT: vpmovm2d %k0, %xmm2
396; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
397; AVX512-NEXT: vpmovd2m %xmm2, %k1
398; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
399; AVX512-NEXT: vmovaps %xmm1, (%rsi)
400; AVX512-NEXT: retq
401;
402; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000403; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000404; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
405; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
406; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
407; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
408; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000409; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
410; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
411; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
412; AVX512NOTDQ-NEXT: retq
413 %d0 = load <64 x i1>, <64 x i1>* %a0
414 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 32,i32 32,i32 32,i32 32>
415 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
416 store <4 x float> %d2, <4 x float>* %a3
417 ret void
418}
419define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
420; AVX512-LABEL: load_v64i1_broadcast_32_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000421; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000422; AVX512-NEXT: kmovq (%rdi), %k0
423; AVX512-NEXT: kshiftrq $32, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +0000424; AVX512-NEXT: vpmovm2d %k0, %ymm2
425; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
426; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000427; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
428; AVX512-NEXT: vmovaps %ymm1, (%rsi)
429; AVX512-NEXT: vzeroupper
430; AVX512-NEXT: retq
431;
432; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000433; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000434; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
435; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +0000436; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
437; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
438; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000439; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000440; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
441; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
442; AVX512NOTDQ-NEXT: vzeroupper
443; AVX512NOTDQ-NEXT: retq
444 %d0 = load <64 x i1>, <64 x i1>* %a0
445 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
446 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
447 store <8 x float> %d2, <8 x float>* %a3
448 ret void
449}
450define void @load_v64i1_broadcast_32_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
451; AVX512-LABEL: load_v64i1_broadcast_32_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000452; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000453; AVX512-NEXT: kmovq (%rdi), %k0
454; AVX512-NEXT: kshiftrq $32, %k0, %k0
455; AVX512-NEXT: vpmovm2d %k0, %zmm2
456; AVX512-NEXT: vpbroadcastd %xmm2, %zmm2
457; AVX512-NEXT: vpmovd2m %zmm2, %k1
458; AVX512-NEXT: vmovaps %zmm0, %zmm1 {%k1}
459; AVX512-NEXT: vmovaps %zmm1, (%rsi)
460; AVX512-NEXT: vzeroupper
461; AVX512-NEXT: retq
462;
463; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000464; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000465; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
466; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
467; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
468; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %zmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000469; AVX512NOTDQ-NEXT: vptestmd %zmm2, %zmm2, %k1
470; AVX512NOTDQ-NEXT: vmovaps %zmm0, %zmm1 {%k1}
471; AVX512NOTDQ-NEXT: vmovaps %zmm1, (%rsi)
472; AVX512NOTDQ-NEXT: vzeroupper
473; AVX512NOTDQ-NEXT: retq
474 %d0 = load <64 x i1>, <64 x i1>* %a0
475 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
476 %d2 = select <16 x i1> %d1, <16 x float> %a1, <16 x float> %a2
477 store <16 x float> %d2, <16 x float>* %a3
478 ret void
479}
480define void @load_v64i1_broadcast_63_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
481; AVX512-LABEL: load_v64i1_broadcast_63_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000482; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000483; AVX512-NEXT: kmovq (%rdi), %k0
484; AVX512-NEXT: kshiftrq $62, %k0, %k0
485; AVX512-NEXT: vpmovm2q %k0, %xmm2
486; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
487; AVX512-NEXT: vpmovq2m %xmm2, %k1
488; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
489; AVX512-NEXT: vmovapd %xmm1, (%rsi)
490; AVX512-NEXT: retq
491;
492; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000493; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000494; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
495; AVX512NOTDQ-NEXT: kshiftrq $62, %k0, %k1
496; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
497; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
498; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000499; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
500; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
501; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
502; AVX512NOTDQ-NEXT: retq
503 %d0 = load <64 x i1>, <64 x i1>* %a0
504 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 63,i32 63>
505 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
506 store <2 x double> %d2, <2 x double>* %a3
507 ret void
508}
509define void @load_v64i1_broadcast_63_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
510; AVX512-LABEL: load_v64i1_broadcast_63_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000511; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000512; AVX512-NEXT: kmovq (%rdi), %k0
513; AVX512-NEXT: kshiftrq $60, %k0, %k0
514; AVX512-NEXT: vpmovm2d %k0, %xmm2
515; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
516; AVX512-NEXT: vpmovd2m %xmm2, %k1
517; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
518; AVX512-NEXT: vmovaps %xmm1, (%rsi)
519; AVX512-NEXT: retq
520;
521; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000522; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000523; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
524; AVX512NOTDQ-NEXT: kshiftrq $60, %k0, %k1
525; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
526; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
527; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000528; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
529; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
530; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
531; AVX512NOTDQ-NEXT: retq
532 %d0 = load <64 x i1>, <64 x i1>* %a0
533 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 63,i32 63,i32 63,i32 63>
534 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
535 store <4 x float> %d2, <4 x float>* %a3
536 ret void
537}
538define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
539; AVX512-LABEL: load_v64i1_broadcast_63_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000540; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000541; AVX512-NEXT: kmovq (%rdi), %k0
542; AVX512-NEXT: kshiftrq $56, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +0000543; AVX512-NEXT: vpmovm2d %k0, %ymm2
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000544; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
545; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000546; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000547; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
548; AVX512-NEXT: vmovaps %ymm1, (%rsi)
549; AVX512-NEXT: vzeroupper
550; AVX512-NEXT: retq
551;
552; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000553; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000554; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
555; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +0000556; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
557; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000558; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
559; AVX512NOTDQ-NEXT: vpermd %ymm2, %ymm3, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000560; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000561; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
562; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
563; AVX512NOTDQ-NEXT: vzeroupper
564; AVX512NOTDQ-NEXT: retq
565 %d0 = load <64 x i1>, <64 x i1>* %a0
566 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
567 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
568 store <8 x float> %d2, <8 x float>* %a3
569 ret void
570}
571define void @load_v64i1_broadcast_63_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
572; AVX512-LABEL: load_v64i1_broadcast_63_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000573; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000574; AVX512-NEXT: kmovq (%rdi), %k0
575; AVX512-NEXT: kshiftrq $48, %k0, %k0
576; AVX512-NEXT: vpmovm2d %k0, %zmm2
577; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
578; AVX512-NEXT: vpermd %zmm2, %zmm3, %zmm2
579; AVX512-NEXT: vpmovd2m %zmm2, %k1
580; AVX512-NEXT: vmovaps %zmm0, %zmm1 {%k1}
581; AVX512-NEXT: vmovaps %zmm1, (%rsi)
582; AVX512-NEXT: vzeroupper
583; AVX512-NEXT: retq
584;
585; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000586; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000587; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
588; AVX512NOTDQ-NEXT: kshiftrq $48, %k0, %k1
589; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
590; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
591; AVX512NOTDQ-NEXT: vpermd %zmm2, %zmm3, %zmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000592; AVX512NOTDQ-NEXT: vptestmd %zmm2, %zmm2, %k1
593; AVX512NOTDQ-NEXT: vmovaps %zmm0, %zmm1 {%k1}
594; AVX512NOTDQ-NEXT: vmovaps %zmm1, (%rsi)
595; AVX512NOTDQ-NEXT: vzeroupper
596; AVX512NOTDQ-NEXT: retq
597 %d0 = load <64 x i1>, <64 x i1>* %a0
598 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
599 %d2 = select <16 x i1> %d1, <16 x float> %a1, <16 x float> %a2
600 store <16 x float> %d2, <16 x float>* %a3
601 ret void
602}
603define void @load_v2i1_broadcast_1_v1i1_store(<2 x i1>* %a0,<1 x i1>* %a1) {
604; AVX512-LABEL: load_v2i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000605; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000606; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000607; AVX512-NEXT: kshiftrb $1, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000608; AVX512-NEXT: kmovb %k0, (%rsi)
609; AVX512-NEXT: retq
610;
611; AVX512NOTDQ-LABEL: load_v2i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000612; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000613; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
614; AVX512NOTDQ-NEXT: kmovd %eax, %k0
615; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
616; AVX512NOTDQ-NEXT: kmovd %k0, %eax
617; AVX512NOTDQ-NEXT: movb %al, (%rsi)
618; AVX512NOTDQ-NEXT: retq
619 %d0 = load <2 x i1>, <2 x i1>* %a0
620 %d1 = shufflevector <2 x i1> %d0,<2 x i1> undef,<1 x i32><i32 1>
621 store <1 x i1> %d1, <1 x i1>* %a1
622 ret void
623}
624define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
625; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000626; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000627; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000628; AVX512-NEXT: kshiftrb $1, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000629; AVX512-NEXT: kmovb %k0, (%rsi)
630; AVX512-NEXT: retq
631;
632; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000633; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000634; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
635; AVX512NOTDQ-NEXT: kmovd %eax, %k0
636; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
637; AVX512NOTDQ-NEXT: kmovd %k0, %eax
638; AVX512NOTDQ-NEXT: movb %al, (%rsi)
639; AVX512NOTDQ-NEXT: retq
640 %d0 = load <3 x i1>, <3 x i1>* %a0
641 %d1 = shufflevector <3 x i1> %d0,<3 x i1> undef,<1 x i32><i32 1>
642 store <1 x i1> %d1, <1 x i1>* %a1
643 ret void
644}
645define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
646; AVX512-LABEL: load_v3i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000647; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000648; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000649; AVX512-NEXT: kshiftrb $2, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000650; AVX512-NEXT: kmovb %k0, (%rsi)
651; AVX512-NEXT: retq
652;
653; AVX512NOTDQ-LABEL: load_v3i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000654; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000655; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
656; AVX512NOTDQ-NEXT: kmovd %eax, %k0
657; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
658; AVX512NOTDQ-NEXT: kmovd %k0, %eax
659; AVX512NOTDQ-NEXT: movb %al, (%rsi)
660; AVX512NOTDQ-NEXT: retq
661 %d0 = load <3 x i1>, <3 x i1>* %a0
662 %d1 = shufflevector <3 x i1> %d0,<3 x i1> undef,<1 x i32><i32 2>
663 store <1 x i1> %d1, <1 x i1>* %a1
664 ret void
665}
666define void @load_v4i1_broadcast_2_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
667; AVX512-LABEL: load_v4i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000668; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000669; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000670; AVX512-NEXT: kshiftrb $2, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000671; AVX512-NEXT: kmovb %k0, (%rsi)
672; AVX512-NEXT: retq
673;
674; AVX512NOTDQ-LABEL: load_v4i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000675; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000676; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
677; AVX512NOTDQ-NEXT: kmovd %eax, %k0
678; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
679; AVX512NOTDQ-NEXT: kmovd %k0, %eax
680; AVX512NOTDQ-NEXT: movb %al, (%rsi)
681; AVX512NOTDQ-NEXT: retq
682 %d0 = load <4 x i1>, <4 x i1>* %a0
683 %d1 = shufflevector <4 x i1> %d0,<4 x i1> undef,<1 x i32><i32 2>
684 store <1 x i1> %d1, <1 x i1>* %a1
685 ret void
686}
687define void @load_v4i1_broadcast_3_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
688; AVX512-LABEL: load_v4i1_broadcast_3_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000689; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000690; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000691; AVX512-NEXT: kshiftrb $3, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000692; AVX512-NEXT: kmovb %k0, (%rsi)
693; AVX512-NEXT: retq
694;
695; AVX512NOTDQ-LABEL: load_v4i1_broadcast_3_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000696; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000697; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
698; AVX512NOTDQ-NEXT: kmovd %eax, %k0
699; AVX512NOTDQ-NEXT: kshiftrw $3, %k0, %k0
700; AVX512NOTDQ-NEXT: kmovd %k0, %eax
701; AVX512NOTDQ-NEXT: movb %al, (%rsi)
702; AVX512NOTDQ-NEXT: retq
703 %d0 = load <4 x i1>, <4 x i1>* %a0
704 %d1 = shufflevector <4 x i1> %d0,<4 x i1> undef,<1 x i32><i32 3>
705 store <1 x i1> %d1, <1 x i1>* %a1
706 ret void
707}
708define void @load_v8i1_broadcast_4_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
709; AVX512-LABEL: load_v8i1_broadcast_4_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000710; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000711; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000712; AVX512-NEXT: kshiftrb $4, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000713; AVX512-NEXT: kmovb %k0, (%rsi)
714; AVX512-NEXT: retq
715;
716; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000717; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000718; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
719; AVX512NOTDQ-NEXT: kmovd %eax, %k0
720; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k0
721; AVX512NOTDQ-NEXT: kmovd %k0, %eax
722; AVX512NOTDQ-NEXT: movb %al, (%rsi)
723; AVX512NOTDQ-NEXT: retq
724 %d0 = load <8 x i1>, <8 x i1>* %a0
725 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<1 x i32><i32 4>
726 store <1 x i1> %d1, <1 x i1>* %a1
727 ret void
728}
729define void @load_v8i1_broadcast_4_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
730; AVX512-LABEL: load_v8i1_broadcast_4_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000731; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000732; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000733; AVX512-NEXT: kshiftrb $4, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000734; AVX512-NEXT: vpmovm2q %k0, %xmm0
735; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
736; AVX512-NEXT: vpmovq2m %xmm0, %k0
737; AVX512-NEXT: kmovb %k0, (%rsi)
738; AVX512-NEXT: retq
739;
740; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000741; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000742; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
743; AVX512NOTDQ-NEXT: kmovd %eax, %k0
744; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k1
745; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
746; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
747; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000748; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
749; AVX512NOTDQ-NEXT: kmovd %k0, %eax
750; AVX512NOTDQ-NEXT: movb %al, (%rsi)
751; AVX512NOTDQ-NEXT: retq
752 %d0 = load <8 x i1>, <8 x i1>* %a0
753 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 4,i32 4>
754 store <2 x i1> %d1, <2 x i1>* %a1
755 ret void
756}
757define void @load_v8i1_broadcast_7_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
758; AVX512-LABEL: load_v8i1_broadcast_7_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000759; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000760; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000761; AVX512-NEXT: kshiftrb $7, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000762; AVX512-NEXT: kmovb %k0, (%rsi)
763; AVX512-NEXT: retq
764;
765; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000766; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000767; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
768; AVX512NOTDQ-NEXT: kmovd %eax, %k0
769; AVX512NOTDQ-NEXT: kshiftrw $7, %k0, %k0
770; AVX512NOTDQ-NEXT: kmovd %k0, %eax
771; AVX512NOTDQ-NEXT: movb %al, (%rsi)
772; AVX512NOTDQ-NEXT: retq
773 %d0 = load <8 x i1>, <8 x i1>* %a0
774 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<1 x i32><i32 7>
775 store <1 x i1> %d1, <1 x i1>* %a1
776 ret void
777}
778define void @load_v8i1_broadcast_7_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
779; AVX512-LABEL: load_v8i1_broadcast_7_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000780; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000781; AVX512-NEXT: kmovb (%rdi), %k0
Craig Topperc5fd31a2017-12-30 06:45:43 +0000782; AVX512-NEXT: kshiftrb $6, %k0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000783; AVX512-NEXT: vpmovm2q %k0, %xmm0
784; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
785; AVX512-NEXT: vpmovq2m %xmm0, %k0
786; AVX512-NEXT: kmovb %k0, (%rsi)
787; AVX512-NEXT: retq
788;
789; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000790; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000791; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
792; AVX512NOTDQ-NEXT: kmovd %eax, %k0
793; AVX512NOTDQ-NEXT: kshiftrw $6, %k0, %k1
794; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
795; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
796; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000797; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
798; AVX512NOTDQ-NEXT: kmovd %k0, %eax
799; AVX512NOTDQ-NEXT: movb %al, (%rsi)
800; AVX512NOTDQ-NEXT: retq
801 %d0 = load <8 x i1>, <8 x i1>* %a0
802 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 7,i32 7>
803 store <2 x i1> %d1, <2 x i1>* %a1
804 ret void
805}
806define void @load_v16i1_broadcast_8_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
807; AVX512-LABEL: load_v16i1_broadcast_8_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000808; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000809; AVX512-NEXT: kmovw (%rdi), %k0
810; AVX512-NEXT: kshiftrw $8, %k0, %k0
811; AVX512-NEXT: kmovb %k0, (%rsi)
812; AVX512-NEXT: retq
813;
814; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000815; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000816; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
817; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k0
818; AVX512NOTDQ-NEXT: kmovd %k0, %eax
819; AVX512NOTDQ-NEXT: movb %al, (%rsi)
820; AVX512NOTDQ-NEXT: retq
821 %d0 = load <16 x i1>, <16 x i1>* %a0
822 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<1 x i32><i32 8>
823 store <1 x i1> %d1, <1 x i1>* %a1
824 ret void
825}
826define void @load_v16i1_broadcast_8_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
827; AVX512-LABEL: load_v16i1_broadcast_8_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000828; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000829; AVX512-NEXT: kmovw (%rdi), %k0
830; AVX512-NEXT: kshiftrw $8, %k0, %k0
831; AVX512-NEXT: vpmovm2q %k0, %xmm0
832; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
833; AVX512-NEXT: vpmovq2m %xmm0, %k0
834; AVX512-NEXT: kmovb %k0, (%rsi)
835; AVX512-NEXT: retq
836;
837; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000838; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000839; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
840; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
841; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
842; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
843; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000844; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
845; AVX512NOTDQ-NEXT: kmovd %k0, %eax
846; AVX512NOTDQ-NEXT: movb %al, (%rsi)
847; AVX512NOTDQ-NEXT: retq
848 %d0 = load <16 x i1>, <16 x i1>* %a0
849 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 8,i32 8>
850 store <2 x i1> %d1, <2 x i1>* %a1
851 ret void
852}
853define void @load_v16i1_broadcast_8_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
854; AVX512-LABEL: load_v16i1_broadcast_8_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000855; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000856; AVX512-NEXT: kmovw (%rdi), %k0
857; AVX512-NEXT: kshiftrw $8, %k0, %k0
858; AVX512-NEXT: vpmovm2d %k0, %xmm0
859; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
860; AVX512-NEXT: vpmovd2m %xmm0, %k0
861; AVX512-NEXT: kmovb %k0, (%rsi)
862; AVX512-NEXT: retq
863;
864; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000865; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000866; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
867; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
868; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
869; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
870; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000871; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
872; AVX512NOTDQ-NEXT: kmovd %k0, %eax
873; AVX512NOTDQ-NEXT: movb %al, (%rsi)
874; AVX512NOTDQ-NEXT: retq
875 %d0 = load <16 x i1>, <16 x i1>* %a0
876 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 8,i32 8,i32 8,i32 8>
877 store <4 x i1> %d1, <4 x i1>* %a1
878 ret void
879}
880define void @load_v16i1_broadcast_15_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
881; AVX512-LABEL: load_v16i1_broadcast_15_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000882; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000883; AVX512-NEXT: kmovw (%rdi), %k0
884; AVX512-NEXT: kshiftrw $15, %k0, %k0
885; AVX512-NEXT: kmovb %k0, (%rsi)
886; AVX512-NEXT: retq
887;
888; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000889; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000890; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
891; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
892; AVX512NOTDQ-NEXT: kmovd %k0, %eax
893; AVX512NOTDQ-NEXT: movb %al, (%rsi)
894; AVX512NOTDQ-NEXT: retq
895 %d0 = load <16 x i1>, <16 x i1>* %a0
896 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<1 x i32><i32 15>
897 store <1 x i1> %d1, <1 x i1>* %a1
898 ret void
899}
900define void @load_v16i1_broadcast_15_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
901; AVX512-LABEL: load_v16i1_broadcast_15_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000902; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000903; AVX512-NEXT: kmovw (%rdi), %k0
904; AVX512-NEXT: kshiftrw $14, %k0, %k0
905; AVX512-NEXT: vpmovm2q %k0, %xmm0
906; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
907; AVX512-NEXT: vpmovq2m %xmm0, %k0
908; AVX512-NEXT: kmovb %k0, (%rsi)
909; AVX512-NEXT: retq
910;
911; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000912; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000913; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
914; AVX512NOTDQ-NEXT: kshiftrw $14, %k0, %k1
915; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
916; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
917; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000918; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
919; AVX512NOTDQ-NEXT: kmovd %k0, %eax
920; AVX512NOTDQ-NEXT: movb %al, (%rsi)
921; AVX512NOTDQ-NEXT: retq
922 %d0 = load <16 x i1>, <16 x i1>* %a0
923 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 15,i32 15>
924 store <2 x i1> %d1, <2 x i1>* %a1
925 ret void
926}
927define void @load_v16i1_broadcast_15_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
928; AVX512-LABEL: load_v16i1_broadcast_15_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000929; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000930; AVX512-NEXT: kmovw (%rdi), %k0
931; AVX512-NEXT: kshiftrw $12, %k0, %k0
932; AVX512-NEXT: vpmovm2d %k0, %xmm0
933; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
934; AVX512-NEXT: vpmovd2m %xmm0, %k0
935; AVX512-NEXT: kmovb %k0, (%rsi)
936; AVX512-NEXT: retq
937;
938; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000939; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000940; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
941; AVX512NOTDQ-NEXT: kshiftrw $12, %k0, %k1
942; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
943; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
944; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000945; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
946; AVX512NOTDQ-NEXT: kmovd %k0, %eax
947; AVX512NOTDQ-NEXT: movb %al, (%rsi)
948; AVX512NOTDQ-NEXT: retq
949 %d0 = load <16 x i1>, <16 x i1>* %a0
950 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 15,i32 15,i32 15,i32 15>
951 store <4 x i1> %d1, <4 x i1>* %a1
952 ret void
953}
954define void @load_v32i1_broadcast_16_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
955; AVX512-LABEL: load_v32i1_broadcast_16_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000956; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000957; AVX512-NEXT: kmovd (%rdi), %k0
958; AVX512-NEXT: kshiftrd $16, %k0, %k0
959; AVX512-NEXT: kmovb %k0, (%rsi)
960; AVX512-NEXT: retq
961;
962; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000963; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000964; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
965; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k0
966; AVX512NOTDQ-NEXT: kmovd %k0, %eax
967; AVX512NOTDQ-NEXT: movb %al, (%rsi)
968; AVX512NOTDQ-NEXT: retq
969 %d0 = load <32 x i1>, <32 x i1>* %a0
970 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<1 x i32><i32 16>
971 store <1 x i1> %d1, <1 x i1>* %a1
972 ret void
973}
974define void @load_v32i1_broadcast_16_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
975; AVX512-LABEL: load_v32i1_broadcast_16_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000976; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000977; AVX512-NEXT: kmovd (%rdi), %k0
978; AVX512-NEXT: kshiftrd $16, %k0, %k0
979; AVX512-NEXT: vpmovm2q %k0, %xmm0
980; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
981; AVX512-NEXT: vpmovq2m %xmm0, %k0
982; AVX512-NEXT: kmovb %k0, (%rsi)
983; AVX512-NEXT: retq
984;
985; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000986; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000987; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
988; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
989; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
990; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
991; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000992; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
993; AVX512NOTDQ-NEXT: kmovd %k0, %eax
994; AVX512NOTDQ-NEXT: movb %al, (%rsi)
995; AVX512NOTDQ-NEXT: retq
996 %d0 = load <32 x i1>, <32 x i1>* %a0
997 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 16,i32 16>
998 store <2 x i1> %d1, <2 x i1>* %a1
999 ret void
1000}
1001define void @load_v32i1_broadcast_16_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
1002; AVX512-LABEL: load_v32i1_broadcast_16_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001003; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001004; AVX512-NEXT: kmovd (%rdi), %k0
1005; AVX512-NEXT: kshiftrd $16, %k0, %k0
1006; AVX512-NEXT: vpmovm2d %k0, %xmm0
1007; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
1008; AVX512-NEXT: vpmovd2m %xmm0, %k0
1009; AVX512-NEXT: kmovb %k0, (%rsi)
1010; AVX512-NEXT: retq
1011;
1012; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001013; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001014; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1015; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
1016; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1017; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1018; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001019; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1020; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1021; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1022; AVX512NOTDQ-NEXT: retq
1023 %d0 = load <32 x i1>, <32 x i1>* %a0
1024 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 16,i32 16,i32 16,i32 16>
1025 store <4 x i1> %d1, <4 x i1>* %a1
1026 ret void
1027}
1028define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
1029; AVX512-LABEL: load_v32i1_broadcast_16_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001030; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001031; AVX512-NEXT: kmovd (%rdi), %k0
1032; AVX512-NEXT: kshiftrd $16, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +00001033; AVX512-NEXT: vpmovm2d %k0, %ymm0
1034; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
1035; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001036; AVX512-NEXT: kmovb %k0, (%rsi)
1037; AVX512-NEXT: vzeroupper
1038; AVX512-NEXT: retq
1039;
1040; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001041; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001042; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1043; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +00001044; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1045; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1046; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001047; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001048; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1049; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1050; AVX512NOTDQ-NEXT: vzeroupper
1051; AVX512NOTDQ-NEXT: retq
1052 %d0 = load <32 x i1>, <32 x i1>* %a0
1053 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16>
1054 store <8 x i1> %d1, <8 x i1>* %a1
1055 ret void
1056}
1057define void @load_v32i1_broadcast_31_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
1058; AVX512-LABEL: load_v32i1_broadcast_31_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001059; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001060; AVX512-NEXT: kmovd (%rdi), %k0
1061; AVX512-NEXT: kshiftrd $31, %k0, %k0
1062; AVX512-NEXT: kmovb %k0, (%rsi)
1063; AVX512-NEXT: retq
1064;
1065; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001066; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001067; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1068; AVX512NOTDQ-NEXT: kshiftrd $31, %k0, %k0
1069; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1070; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1071; AVX512NOTDQ-NEXT: retq
1072 %d0 = load <32 x i1>, <32 x i1>* %a0
1073 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<1 x i32><i32 31>
1074 store <1 x i1> %d1, <1 x i1>* %a1
1075 ret void
1076}
1077define void @load_v32i1_broadcast_31_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
1078; AVX512-LABEL: load_v32i1_broadcast_31_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001079; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001080; AVX512-NEXT: kmovd (%rdi), %k0
1081; AVX512-NEXT: kshiftrd $30, %k0, %k0
1082; AVX512-NEXT: vpmovm2q %k0, %xmm0
1083; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1084; AVX512-NEXT: vpmovq2m %xmm0, %k0
1085; AVX512-NEXT: kmovb %k0, (%rsi)
1086; AVX512-NEXT: retq
1087;
1088; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001089; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001090; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1091; AVX512NOTDQ-NEXT: kshiftrd $30, %k0, %k1
1092; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1093; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1094; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001095; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
1096; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1097; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1098; AVX512NOTDQ-NEXT: retq
1099 %d0 = load <32 x i1>, <32 x i1>* %a0
1100 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 31,i32 31>
1101 store <2 x i1> %d1, <2 x i1>* %a1
1102 ret void
1103}
1104define void @load_v32i1_broadcast_31_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
1105; AVX512-LABEL: load_v32i1_broadcast_31_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001106; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001107; AVX512-NEXT: kmovd (%rdi), %k0
1108; AVX512-NEXT: kshiftrd $28, %k0, %k0
1109; AVX512-NEXT: vpmovm2d %k0, %xmm0
1110; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1111; AVX512-NEXT: vpmovd2m %xmm0, %k0
1112; AVX512-NEXT: kmovb %k0, (%rsi)
1113; AVX512-NEXT: retq
1114;
1115; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001116; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001117; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1118; AVX512NOTDQ-NEXT: kshiftrd $28, %k0, %k1
1119; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1120; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1121; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001122; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1123; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1124; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1125; AVX512NOTDQ-NEXT: retq
1126 %d0 = load <32 x i1>, <32 x i1>* %a0
1127 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 31,i32 31,i32 31,i32 31>
1128 store <4 x i1> %d1, <4 x i1>* %a1
1129 ret void
1130}
1131define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
1132; AVX512-LABEL: load_v32i1_broadcast_31_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001133; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001134; AVX512-NEXT: kmovd (%rdi), %k0
1135; AVX512-NEXT: kshiftrd $24, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +00001136; AVX512-NEXT: vpmovm2d %k0, %ymm0
Zvi Rackover72b0bb12018-01-09 16:26:06 +00001137; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
1138; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001139; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001140; AVX512-NEXT: kmovb %k0, (%rsi)
1141; AVX512-NEXT: vzeroupper
1142; AVX512-NEXT: retq
1143;
1144; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001145; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001146; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1147; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +00001148; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1149; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
Zvi Rackover72b0bb12018-01-09 16:26:06 +00001150; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
1151; AVX512NOTDQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001152; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001153; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1154; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1155; AVX512NOTDQ-NEXT: vzeroupper
1156; AVX512NOTDQ-NEXT: retq
1157 %d0 = load <32 x i1>, <32 x i1>* %a0
1158 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31>
1159 store <8 x i1> %d1, <8 x i1>* %a1
1160 ret void
1161}
1162define void @load_v64i1_broadcast_32_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
1163; AVX512-LABEL: load_v64i1_broadcast_32_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001164; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001165; AVX512-NEXT: kmovq (%rdi), %k0
1166; AVX512-NEXT: kshiftrq $32, %k0, %k0
1167; AVX512-NEXT: kmovb %k0, (%rsi)
1168; AVX512-NEXT: retq
1169;
1170; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001171; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001172; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1173; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k0
1174; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1175; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1176; AVX512NOTDQ-NEXT: retq
1177 %d0 = load <64 x i1>, <64 x i1>* %a0
1178 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<1 x i32><i32 32>
1179 store <1 x i1> %d1, <1 x i1>* %a1
1180 ret void
1181}
1182define void @load_v64i1_broadcast_32_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
1183; AVX512-LABEL: load_v64i1_broadcast_32_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001184; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001185; AVX512-NEXT: kmovq (%rdi), %k0
1186; AVX512-NEXT: kshiftrq $32, %k0, %k0
1187; AVX512-NEXT: vpmovm2q %k0, %xmm0
1188; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
1189; AVX512-NEXT: vpmovq2m %xmm0, %k0
1190; AVX512-NEXT: kmovb %k0, (%rsi)
1191; AVX512-NEXT: retq
1192;
1193; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001194; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001195; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1196; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
1197; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1198; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1199; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001200; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
1201; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1202; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1203; AVX512NOTDQ-NEXT: retq
1204 %d0 = load <64 x i1>, <64 x i1>* %a0
1205 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 32,i32 32>
1206 store <2 x i1> %d1, <2 x i1>* %a1
1207 ret void
1208}
1209define void @load_v64i1_broadcast_32_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
1210; AVX512-LABEL: load_v64i1_broadcast_32_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001211; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001212; AVX512-NEXT: kmovq (%rdi), %k0
1213; AVX512-NEXT: kshiftrq $32, %k0, %k0
1214; AVX512-NEXT: vpmovm2d %k0, %xmm0
1215; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
1216; AVX512-NEXT: vpmovd2m %xmm0, %k0
1217; AVX512-NEXT: kmovb %k0, (%rsi)
1218; AVX512-NEXT: retq
1219;
1220; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001221; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001222; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1223; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
1224; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1225; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1226; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001227; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1228; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1229; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1230; AVX512NOTDQ-NEXT: retq
1231 %d0 = load <64 x i1>, <64 x i1>* %a0
1232 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 32,i32 32,i32 32,i32 32>
1233 store <4 x i1> %d1, <4 x i1>* %a1
1234 ret void
1235}
1236define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
1237; AVX512-LABEL: load_v64i1_broadcast_32_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001238; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001239; AVX512-NEXT: kmovq (%rdi), %k0
1240; AVX512-NEXT: kshiftrq $32, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +00001241; AVX512-NEXT: vpmovm2d %k0, %ymm0
1242; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
1243; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001244; AVX512-NEXT: kmovb %k0, (%rsi)
1245; AVX512-NEXT: vzeroupper
1246; AVX512-NEXT: retq
1247;
1248; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001249; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001250; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1251; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +00001252; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1253; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1254; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001255; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001256; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1257; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1258; AVX512NOTDQ-NEXT: vzeroupper
1259; AVX512NOTDQ-NEXT: retq
1260 %d0 = load <64 x i1>, <64 x i1>* %a0
1261 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
1262 store <8 x i1> %d1, <8 x i1>* %a1
1263 ret void
1264}
1265define void @load_v64i1_broadcast_32_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
1266; AVX512-LABEL: load_v64i1_broadcast_32_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001267; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001268; AVX512-NEXT: kmovq (%rdi), %k0
1269; AVX512-NEXT: kshiftrq $32, %k0, %k0
1270; AVX512-NEXT: vpmovm2d %k0, %zmm0
1271; AVX512-NEXT: vpbroadcastd %xmm0, %zmm0
1272; AVX512-NEXT: vpmovd2m %zmm0, %k0
1273; AVX512-NEXT: kmovw %k0, (%rsi)
1274; AVX512-NEXT: vzeroupper
1275; AVX512-NEXT: retq
1276;
1277; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001278; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001279; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1280; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
1281; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1282; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %zmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001283; AVX512NOTDQ-NEXT: vptestmd %zmm0, %zmm0, %k0
1284; AVX512NOTDQ-NEXT: kmovw %k0, (%rsi)
1285; AVX512NOTDQ-NEXT: vzeroupper
1286; AVX512NOTDQ-NEXT: retq
1287 %d0 = load <64 x i1>, <64 x i1>* %a0
1288 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
1289 store <16 x i1> %d1, <16 x i1>* %a1
1290 ret void
1291}
1292define void @load_v64i1_broadcast_63_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
1293; AVX512-LABEL: load_v64i1_broadcast_63_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001294; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001295; AVX512-NEXT: kmovq (%rdi), %k0
1296; AVX512-NEXT: kshiftrq $63, %k0, %k0
1297; AVX512-NEXT: kmovb %k0, (%rsi)
1298; AVX512-NEXT: retq
1299;
1300; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001301; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001302; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1303; AVX512NOTDQ-NEXT: kshiftrq $63, %k0, %k0
1304; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1305; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1306; AVX512NOTDQ-NEXT: retq
1307 %d0 = load <64 x i1>, <64 x i1>* %a0
1308 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<1 x i32><i32 63>
1309 store <1 x i1> %d1, <1 x i1>* %a1
1310 ret void
1311}
1312define void @load_v64i1_broadcast_63_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
1313; AVX512-LABEL: load_v64i1_broadcast_63_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001314; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001315; AVX512-NEXT: kmovq (%rdi), %k0
1316; AVX512-NEXT: kshiftrq $62, %k0, %k0
1317; AVX512-NEXT: vpmovm2q %k0, %xmm0
1318; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1319; AVX512-NEXT: vpmovq2m %xmm0, %k0
1320; AVX512-NEXT: kmovb %k0, (%rsi)
1321; AVX512-NEXT: retq
1322;
1323; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001324; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001325; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1326; AVX512NOTDQ-NEXT: kshiftrq $62, %k0, %k1
1327; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1328; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1329; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001330; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
1331; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1332; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1333; AVX512NOTDQ-NEXT: retq
1334 %d0 = load <64 x i1>, <64 x i1>* %a0
1335 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 63,i32 63>
1336 store <2 x i1> %d1, <2 x i1>* %a1
1337 ret void
1338}
1339define void @load_v64i1_broadcast_63_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
1340; AVX512-LABEL: load_v64i1_broadcast_63_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001341; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001342; AVX512-NEXT: kmovq (%rdi), %k0
1343; AVX512-NEXT: kshiftrq $60, %k0, %k0
1344; AVX512-NEXT: vpmovm2d %k0, %xmm0
1345; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1346; AVX512-NEXT: vpmovd2m %xmm0, %k0
1347; AVX512-NEXT: kmovb %k0, (%rsi)
1348; AVX512-NEXT: retq
1349;
1350; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001351; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001352; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1353; AVX512NOTDQ-NEXT: kshiftrq $60, %k0, %k1
1354; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1355; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1356; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001357; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1358; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1359; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1360; AVX512NOTDQ-NEXT: retq
1361 %d0 = load <64 x i1>, <64 x i1>* %a0
1362 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 63,i32 63,i32 63,i32 63>
1363 store <4 x i1> %d1, <4 x i1>* %a1
1364 ret void
1365}
1366define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
1367; AVX512-LABEL: load_v64i1_broadcast_63_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001368; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001369; AVX512-NEXT: kmovq (%rdi), %k0
1370; AVX512-NEXT: kshiftrq $56, %k0, %k0
Craig Topper410a2892017-12-21 18:44:06 +00001371; AVX512-NEXT: vpmovm2d %k0, %ymm0
Zvi Rackover72b0bb12018-01-09 16:26:06 +00001372; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
1373; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001374; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001375; AVX512-NEXT: kmovb %k0, (%rsi)
1376; AVX512-NEXT: vzeroupper
1377; AVX512-NEXT: retq
1378;
1379; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001380; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001381; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1382; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
Craig Topper410a2892017-12-21 18:44:06 +00001383; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1384; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
Zvi Rackover72b0bb12018-01-09 16:26:06 +00001385; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
1386; AVX512NOTDQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001387; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001388; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1389; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1390; AVX512NOTDQ-NEXT: vzeroupper
1391; AVX512NOTDQ-NEXT: retq
1392 %d0 = load <64 x i1>, <64 x i1>* %a0
1393 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
1394 store <8 x i1> %d1, <8 x i1>* %a1
1395 ret void
1396}
1397define void @load_v64i1_broadcast_63_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
1398; AVX512-LABEL: load_v64i1_broadcast_63_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001399; AVX512: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001400; AVX512-NEXT: kmovq (%rdi), %k0
1401; AVX512-NEXT: kshiftrq $48, %k0, %k0
1402; AVX512-NEXT: vpmovm2d %k0, %zmm0
1403; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1404; AVX512-NEXT: vpermd %zmm0, %zmm1, %zmm0
1405; AVX512-NEXT: vpmovd2m %zmm0, %k0
1406; AVX512-NEXT: kmovw %k0, (%rsi)
1407; AVX512-NEXT: vzeroupper
1408; AVX512-NEXT: retq
1409;
1410; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001411; AVX512NOTDQ: # %bb.0:
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001412; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1413; AVX512NOTDQ-NEXT: kshiftrq $48, %k0, %k1
1414; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1415; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1416; AVX512NOTDQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001417; AVX512NOTDQ-NEXT: vptestmd %zmm0, %zmm0, %k0
1418; AVX512NOTDQ-NEXT: kmovw %k0, (%rsi)
1419; AVX512NOTDQ-NEXT: vzeroupper
1420; AVX512NOTDQ-NEXT: retq
1421 %d0 = load <64 x i1>, <64 x i1>* %a0
1422 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
1423 store <16 x i1> %d1, <16 x i1>* %a1
1424 ret void
1425}
1426