blob: df80e08280d49c896d538ef4bb755a087715715f [file] [log] [blame]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Zvi Rackover72b0bb12018-01-09 16:26:06 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512NOTDQ
Michael Zuckerman0c20b692017-11-02 12:19:36 +00004
5define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
6; AVX512-LABEL: load_v8i1_broadcast_4_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00008; AVX512-NEXT: kmovb 4(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00009; AVX512-NEXT: vpmovm2q %k0, %xmm2
10; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
11; AVX512-NEXT: vpmovq2m %xmm2, %k1
12; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
13; AVX512-NEXT: vmovapd %xmm1, (%rsi)
14; AVX512-NEXT: retq
15;
16; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000017; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +000018; AVX512NOTDQ-NEXT: kmovw 4(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +000019; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
20; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
21; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +000022; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
23; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
24; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
25; AVX512NOTDQ-NEXT: retq
26 %d0 = load <8 x i1>, <8 x i1>* %a0
27 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 4,i32 4>
28 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
29 store <2 x double> %d2, <2 x double>* %a3
30 ret void
31}
32define void @load_v8i1_broadcast_7_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
33; AVX512-LABEL: load_v8i1_broadcast_7_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000034; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +000035; AVX512-NEXT: kmovb 6(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +000036; AVX512-NEXT: vpmovm2q %k0, %xmm2
37; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
38; AVX512-NEXT: vpmovq2m %xmm2, %k1
39; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
40; AVX512-NEXT: vmovapd %xmm1, (%rsi)
41; AVX512-NEXT: retq
42;
43; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000044; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +000045; AVX512NOTDQ-NEXT: kmovw 6(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +000046; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
47; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
48; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +000049; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
50; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
51; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
52; AVX512NOTDQ-NEXT: retq
53 %d0 = load <8 x i1>, <8 x i1>* %a0
54 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 7,i32 7>
55 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
56 store <2 x double> %d2, <2 x double>* %a3
57 ret void
58}
59define void @load_v16i1_broadcast_8_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
60; AVX512-LABEL: load_v16i1_broadcast_8_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000061; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +000062; AVX512-NEXT: kmovb 8(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +000063; AVX512-NEXT: vpmovm2q %k0, %xmm2
64; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
65; AVX512-NEXT: vpmovq2m %xmm2, %k1
66; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
67; AVX512-NEXT: vmovapd %xmm1, (%rsi)
68; AVX512-NEXT: retq
69;
70; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000071; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +000072; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +000073; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
74; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
75; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +000076; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
77; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
78; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
79; AVX512NOTDQ-NEXT: retq
80 %d0 = load <16 x i1>, <16 x i1>* %a0
81 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 8,i32 8>
82 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
83 store <2 x double> %d2, <2 x double>* %a3
84 ret void
85}
86define void @load_v16i1_broadcast_8_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
87; AVX512-LABEL: load_v16i1_broadcast_8_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000088; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +000089; AVX512-NEXT: kmovb 8(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +000090; AVX512-NEXT: vpmovm2d %k0, %xmm2
91; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
92; AVX512-NEXT: vpmovd2m %xmm2, %k1
93; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
94; AVX512-NEXT: vmovaps %xmm1, (%rsi)
95; AVX512-NEXT: retq
96;
97; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000098; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +000099; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000100; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
101; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
102; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000103; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
104; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
105; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
106; AVX512NOTDQ-NEXT: retq
107 %d0 = load <16 x i1>, <16 x i1>* %a0
108 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 8,i32 8,i32 8,i32 8>
109 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
110 store <4 x float> %d2, <4 x float>* %a3
111 ret void
112}
113define void @load_v16i1_broadcast_15_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
114; AVX512-LABEL: load_v16i1_broadcast_15_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000115; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000116; AVX512-NEXT: kmovb 14(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000117; AVX512-NEXT: vpmovm2q %k0, %xmm2
118; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
119; AVX512-NEXT: vpmovq2m %xmm2, %k1
120; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
121; AVX512-NEXT: vmovapd %xmm1, (%rsi)
122; AVX512-NEXT: retq
123;
124; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000125; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000126; AVX512NOTDQ-NEXT: kmovw 14(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000127; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
128; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
129; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000130; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
131; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
132; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
133; AVX512NOTDQ-NEXT: retq
134 %d0 = load <16 x i1>, <16 x i1>* %a0
135 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 15,i32 15>
136 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
137 store <2 x double> %d2, <2 x double>* %a3
138 ret void
139}
140define void @load_v16i1_broadcast_15_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
141; AVX512-LABEL: load_v16i1_broadcast_15_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000142; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000143; AVX512-NEXT: kmovb 12(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000144; AVX512-NEXT: vpmovm2d %k0, %xmm2
145; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
146; AVX512-NEXT: vpmovd2m %xmm2, %k1
147; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
148; AVX512-NEXT: vmovaps %xmm1, (%rsi)
149; AVX512-NEXT: retq
150;
151; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000152; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000153; AVX512NOTDQ-NEXT: kmovw 12(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000154; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
155; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
156; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000157; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
158; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
159; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
160; AVX512NOTDQ-NEXT: retq
161 %d0 = load <16 x i1>, <16 x i1>* %a0
162 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 15,i32 15,i32 15,i32 15>
163 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
164 store <4 x float> %d2, <4 x float>* %a3
165 ret void
166}
167define void @load_v32i1_broadcast_16_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
168; AVX512-LABEL: load_v32i1_broadcast_16_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000169; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000170; AVX512-NEXT: kmovb 16(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000171; AVX512-NEXT: vpmovm2q %k0, %xmm2
172; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
173; AVX512-NEXT: vpmovq2m %xmm2, %k1
174; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
175; AVX512-NEXT: vmovapd %xmm1, (%rsi)
176; AVX512-NEXT: retq
177;
178; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000179; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000180; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000181; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
182; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
183; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000184; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
185; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
186; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
187; AVX512NOTDQ-NEXT: retq
188 %d0 = load <32 x i1>, <32 x i1>* %a0
189 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 16,i32 16>
190 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
191 store <2 x double> %d2, <2 x double>* %a3
192 ret void
193}
194define void @load_v32i1_broadcast_16_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
195; AVX512-LABEL: load_v32i1_broadcast_16_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000196; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000197; AVX512-NEXT: kmovb 16(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000198; AVX512-NEXT: vpmovm2d %k0, %xmm2
199; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
200; AVX512-NEXT: vpmovd2m %xmm2, %k1
201; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
202; AVX512-NEXT: vmovaps %xmm1, (%rsi)
203; AVX512-NEXT: retq
204;
205; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000206; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000207; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000208; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
209; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
210; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000211; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
212; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
213; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
214; AVX512NOTDQ-NEXT: retq
215 %d0 = load <32 x i1>, <32 x i1>* %a0
216 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 16,i32 16,i32 16,i32 16>
217 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
218 store <4 x float> %d2, <4 x float>* %a3
219 ret void
220}
221define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
222; AVX512-LABEL: load_v32i1_broadcast_16_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000223; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000224; AVX512-NEXT: kmovb 16(%rdi), %k0
Craig Topper410a2892017-12-21 18:44:06 +0000225; AVX512-NEXT: vpmovm2d %k0, %ymm2
226; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
227; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000228; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
229; AVX512-NEXT: vmovaps %ymm1, (%rsi)
230; AVX512-NEXT: vzeroupper
231; AVX512-NEXT: retq
232;
233; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000234; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000235; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
Craig Topper410a2892017-12-21 18:44:06 +0000236; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
237; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
238; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000239; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000240; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
241; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
242; AVX512NOTDQ-NEXT: vzeroupper
243; AVX512NOTDQ-NEXT: retq
244 %d0 = load <32 x i1>, <32 x i1>* %a0
245 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16>
246 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
247 store <8 x float> %d2, <8 x float>* %a3
248 ret void
249}
250define void @load_v32i1_broadcast_31_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
251; AVX512-LABEL: load_v32i1_broadcast_31_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000252; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000253; AVX512-NEXT: kmovb 30(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000254; AVX512-NEXT: vpmovm2q %k0, %xmm2
255; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
256; AVX512-NEXT: vpmovq2m %xmm2, %k1
257; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
258; AVX512-NEXT: vmovapd %xmm1, (%rsi)
259; AVX512-NEXT: retq
260;
261; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000262; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000263; AVX512NOTDQ-NEXT: kmovw 30(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000264; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
265; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
266; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000267; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
268; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
269; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
270; AVX512NOTDQ-NEXT: retq
271 %d0 = load <32 x i1>, <32 x i1>* %a0
272 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 31,i32 31>
273 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
274 store <2 x double> %d2, <2 x double>* %a3
275 ret void
276}
277define void @load_v32i1_broadcast_31_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
278; AVX512-LABEL: load_v32i1_broadcast_31_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000279; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000280; AVX512-NEXT: kmovb 28(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000281; AVX512-NEXT: vpmovm2d %k0, %xmm2
282; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
283; AVX512-NEXT: vpmovd2m %xmm2, %k1
284; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
285; AVX512-NEXT: vmovaps %xmm1, (%rsi)
286; AVX512-NEXT: retq
287;
288; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000289; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000290; AVX512NOTDQ-NEXT: kmovw 28(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000291; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
292; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
293; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000294; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
295; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
296; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
297; AVX512NOTDQ-NEXT: retq
298 %d0 = load <32 x i1>, <32 x i1>* %a0
299 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 31,i32 31,i32 31,i32 31>
300 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
301 store <4 x float> %d2, <4 x float>* %a3
302 ret void
303}
304define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
305; AVX512-LABEL: load_v32i1_broadcast_31_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000306; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000307; AVX512-NEXT: kmovb 24(%rdi), %k0
Craig Topper410a2892017-12-21 18:44:06 +0000308; AVX512-NEXT: vpmovm2d %k0, %ymm2
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000309; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
310; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000311; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000312; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
313; AVX512-NEXT: vmovaps %ymm1, (%rsi)
314; AVX512-NEXT: vzeroupper
315; AVX512-NEXT: retq
316;
317; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000318; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000319; AVX512NOTDQ-NEXT: kmovw 24(%rdi), %k1
Craig Topper410a2892017-12-21 18:44:06 +0000320; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
321; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000322; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
323; AVX512NOTDQ-NEXT: vpermd %ymm2, %ymm3, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000324; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000325; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
326; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
327; AVX512NOTDQ-NEXT: vzeroupper
328; AVX512NOTDQ-NEXT: retq
329 %d0 = load <32 x i1>, <32 x i1>* %a0
330 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31>
331 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
332 store <8 x float> %d2, <8 x float>* %a3
333 ret void
334}
335define void @load_v64i1_broadcast_32_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
336; AVX512-LABEL: load_v64i1_broadcast_32_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000337; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000338; AVX512-NEXT: kmovb 32(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000339; AVX512-NEXT: vpmovm2q %k0, %xmm2
340; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
341; AVX512-NEXT: vpmovq2m %xmm2, %k1
342; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
343; AVX512-NEXT: vmovapd %xmm1, (%rsi)
344; AVX512-NEXT: retq
345;
346; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000347; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000348; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000349; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
350; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
351; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000352; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
353; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
354; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
355; AVX512NOTDQ-NEXT: retq
356 %d0 = load <64 x i1>, <64 x i1>* %a0
357 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 32,i32 32>
358 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
359 store <2 x double> %d2, <2 x double>* %a3
360 ret void
361}
362define void @load_v64i1_broadcast_32_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
363; AVX512-LABEL: load_v64i1_broadcast_32_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000364; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000365; AVX512-NEXT: kmovb 32(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000366; AVX512-NEXT: vpmovm2d %k0, %xmm2
367; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
368; AVX512-NEXT: vpmovd2m %xmm2, %k1
369; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
370; AVX512-NEXT: vmovaps %xmm1, (%rsi)
371; AVX512-NEXT: retq
372;
373; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000374; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000375; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000376; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
377; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
378; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000379; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
380; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
381; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
382; AVX512NOTDQ-NEXT: retq
383 %d0 = load <64 x i1>, <64 x i1>* %a0
384 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 32,i32 32,i32 32,i32 32>
385 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
386 store <4 x float> %d2, <4 x float>* %a3
387 ret void
388}
389define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
390; AVX512-LABEL: load_v64i1_broadcast_32_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000391; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000392; AVX512-NEXT: kmovb 32(%rdi), %k0
Craig Topper410a2892017-12-21 18:44:06 +0000393; AVX512-NEXT: vpmovm2d %k0, %ymm2
394; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
395; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000396; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
397; AVX512-NEXT: vmovaps %ymm1, (%rsi)
398; AVX512-NEXT: vzeroupper
399; AVX512-NEXT: retq
400;
401; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000402; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000403; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
Craig Topper410a2892017-12-21 18:44:06 +0000404; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
405; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
406; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000407; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000408; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
409; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
410; AVX512NOTDQ-NEXT: vzeroupper
411; AVX512NOTDQ-NEXT: retq
412 %d0 = load <64 x i1>, <64 x i1>* %a0
413 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
414 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
415 store <8 x float> %d2, <8 x float>* %a3
416 ret void
417}
418define void @load_v64i1_broadcast_32_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
419; AVX512-LABEL: load_v64i1_broadcast_32_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000420; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000421; AVX512-NEXT: kmovw 32(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000422; AVX512-NEXT: vpmovm2d %k0, %zmm2
423; AVX512-NEXT: vpbroadcastd %xmm2, %zmm2
424; AVX512-NEXT: vpmovd2m %zmm2, %k1
425; AVX512-NEXT: vmovaps %zmm0, %zmm1 {%k1}
426; AVX512-NEXT: vmovaps %zmm1, (%rsi)
427; AVX512-NEXT: vzeroupper
428; AVX512-NEXT: retq
429;
430; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000431; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000432; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000433; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
434; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %zmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000435; AVX512NOTDQ-NEXT: vptestmd %zmm2, %zmm2, %k1
436; AVX512NOTDQ-NEXT: vmovaps %zmm0, %zmm1 {%k1}
437; AVX512NOTDQ-NEXT: vmovaps %zmm1, (%rsi)
438; AVX512NOTDQ-NEXT: vzeroupper
439; AVX512NOTDQ-NEXT: retq
440 %d0 = load <64 x i1>, <64 x i1>* %a0
441 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
442 %d2 = select <16 x i1> %d1, <16 x float> %a1, <16 x float> %a2
443 store <16 x float> %d2, <16 x float>* %a3
444 ret void
445}
446define void @load_v64i1_broadcast_63_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
447; AVX512-LABEL: load_v64i1_broadcast_63_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000448; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000449; AVX512-NEXT: kmovb 62(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000450; AVX512-NEXT: vpmovm2q %k0, %xmm2
451; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
452; AVX512-NEXT: vpmovq2m %xmm2, %k1
453; AVX512-NEXT: vmovapd %xmm0, %xmm1 {%k1}
454; AVX512-NEXT: vmovapd %xmm1, (%rsi)
455; AVX512-NEXT: retq
456;
457; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000458; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000459; AVX512NOTDQ-NEXT: kmovw 62(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000460; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
461; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
462; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000463; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
464; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
465; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
466; AVX512NOTDQ-NEXT: retq
467 %d0 = load <64 x i1>, <64 x i1>* %a0
468 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 63,i32 63>
469 %d2 = select <2 x i1> %d1, <2 x double> %a1, <2 x double> %a2
470 store <2 x double> %d2, <2 x double>* %a3
471 ret void
472}
473define void @load_v64i1_broadcast_63_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
474; AVX512-LABEL: load_v64i1_broadcast_63_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000475; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000476; AVX512-NEXT: kmovb 60(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000477; AVX512-NEXT: vpmovm2d %k0, %xmm2
478; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
479; AVX512-NEXT: vpmovd2m %xmm2, %k1
480; AVX512-NEXT: vmovaps %xmm0, %xmm1 {%k1}
481; AVX512-NEXT: vmovaps %xmm1, (%rsi)
482; AVX512-NEXT: retq
483;
484; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000485; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000486; AVX512NOTDQ-NEXT: kmovw 60(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000487; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
488; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
489; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000490; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
491; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
492; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
493; AVX512NOTDQ-NEXT: retq
494 %d0 = load <64 x i1>, <64 x i1>* %a0
495 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 63,i32 63,i32 63,i32 63>
496 %d2 = select <4 x i1> %d1, <4 x float> %a1, <4 x float> %a2
497 store <4 x float> %d2, <4 x float>* %a3
498 ret void
499}
500define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
501; AVX512-LABEL: load_v64i1_broadcast_63_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000502; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000503; AVX512-NEXT: kmovb 56(%rdi), %k0
Craig Topper410a2892017-12-21 18:44:06 +0000504; AVX512-NEXT: vpmovm2d %k0, %ymm2
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000505; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
506; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000507; AVX512-NEXT: vpmovd2m %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000508; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
509; AVX512-NEXT: vmovaps %ymm1, (%rsi)
510; AVX512-NEXT: vzeroupper
511; AVX512-NEXT: retq
512;
513; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000514; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000515; AVX512NOTDQ-NEXT: kmovw 56(%rdi), %k1
Craig Topper410a2892017-12-21 18:44:06 +0000516; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
517; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000518; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
519; AVX512NOTDQ-NEXT: vpermd %ymm2, %ymm3, %ymm2
Craig Topper410a2892017-12-21 18:44:06 +0000520; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000521; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
522; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
523; AVX512NOTDQ-NEXT: vzeroupper
524; AVX512NOTDQ-NEXT: retq
525 %d0 = load <64 x i1>, <64 x i1>* %a0
526 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
527 %d2 = select <8 x i1> %d1, <8 x float> %a1, <8 x float> %a2
528 store <8 x float> %d2, <8 x float>* %a3
529 ret void
530}
531define void @load_v64i1_broadcast_63_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
532; AVX512-LABEL: load_v64i1_broadcast_63_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000533; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000534; AVX512-NEXT: kmovw 48(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000535; AVX512-NEXT: vpmovm2d %k0, %zmm2
536; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
537; AVX512-NEXT: vpermd %zmm2, %zmm3, %zmm2
538; AVX512-NEXT: vpmovd2m %zmm2, %k1
539; AVX512-NEXT: vmovaps %zmm0, %zmm1 {%k1}
540; AVX512-NEXT: vmovaps %zmm1, (%rsi)
541; AVX512-NEXT: vzeroupper
542; AVX512-NEXT: retq
543;
544; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000545; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000546; AVX512NOTDQ-NEXT: kmovw 48(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000547; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
548; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
549; AVX512NOTDQ-NEXT: vpermd %zmm2, %zmm3, %zmm2
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000550; AVX512NOTDQ-NEXT: vptestmd %zmm2, %zmm2, %k1
551; AVX512NOTDQ-NEXT: vmovaps %zmm0, %zmm1 {%k1}
552; AVX512NOTDQ-NEXT: vmovaps %zmm1, (%rsi)
553; AVX512NOTDQ-NEXT: vzeroupper
554; AVX512NOTDQ-NEXT: retq
555 %d0 = load <64 x i1>, <64 x i1>* %a0
556 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
557 %d2 = select <16 x i1> %d1, <16 x float> %a1, <16 x float> %a2
558 store <16 x float> %d2, <16 x float>* %a3
559 ret void
560}
561define void @load_v2i1_broadcast_1_v1i1_store(<2 x i1>* %a0,<1 x i1>* %a1) {
562; AVX512-LABEL: load_v2i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000563; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000564; AVX512-NEXT: kmovb 1(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000565; AVX512-NEXT: kmovb %k0, (%rsi)
566; AVX512-NEXT: retq
567;
568; AVX512NOTDQ-LABEL: load_v2i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000569; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000570; AVX512NOTDQ-NEXT: movb 1(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000571; AVX512NOTDQ-NEXT: movb %al, (%rsi)
572; AVX512NOTDQ-NEXT: retq
573 %d0 = load <2 x i1>, <2 x i1>* %a0
574 %d1 = shufflevector <2 x i1> %d0,<2 x i1> undef,<1 x i32><i32 1>
575 store <1 x i1> %d1, <1 x i1>* %a1
576 ret void
577}
578define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
579; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000580; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000581; AVX512-NEXT: kmovb 1(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000582; AVX512-NEXT: kmovb %k0, (%rsi)
583; AVX512-NEXT: retq
584;
585; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000586; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000587; AVX512NOTDQ-NEXT: movb 1(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000588; AVX512NOTDQ-NEXT: movb %al, (%rsi)
589; AVX512NOTDQ-NEXT: retq
590 %d0 = load <3 x i1>, <3 x i1>* %a0
591 %d1 = shufflevector <3 x i1> %d0,<3 x i1> undef,<1 x i32><i32 1>
592 store <1 x i1> %d1, <1 x i1>* %a1
593 ret void
594}
595define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
596; AVX512-LABEL: load_v3i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000597; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000598; AVX512-NEXT: kmovb 2(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000599; AVX512-NEXT: kmovb %k0, (%rsi)
600; AVX512-NEXT: retq
601;
602; AVX512NOTDQ-LABEL: load_v3i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000603; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000604; AVX512NOTDQ-NEXT: movb 2(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000605; AVX512NOTDQ-NEXT: movb %al, (%rsi)
606; AVX512NOTDQ-NEXT: retq
607 %d0 = load <3 x i1>, <3 x i1>* %a0
608 %d1 = shufflevector <3 x i1> %d0,<3 x i1> undef,<1 x i32><i32 2>
609 store <1 x i1> %d1, <1 x i1>* %a1
610 ret void
611}
612define void @load_v4i1_broadcast_2_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
613; AVX512-LABEL: load_v4i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000614; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000615; AVX512-NEXT: kmovb 2(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000616; AVX512-NEXT: kmovb %k0, (%rsi)
617; AVX512-NEXT: retq
618;
619; AVX512NOTDQ-LABEL: load_v4i1_broadcast_2_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000620; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000621; AVX512NOTDQ-NEXT: movb 2(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000622; AVX512NOTDQ-NEXT: movb %al, (%rsi)
623; AVX512NOTDQ-NEXT: retq
624 %d0 = load <4 x i1>, <4 x i1>* %a0
625 %d1 = shufflevector <4 x i1> %d0,<4 x i1> undef,<1 x i32><i32 2>
626 store <1 x i1> %d1, <1 x i1>* %a1
627 ret void
628}
629define void @load_v4i1_broadcast_3_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
630; AVX512-LABEL: load_v4i1_broadcast_3_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000631; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000632; AVX512-NEXT: kmovb 3(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000633; AVX512-NEXT: kmovb %k0, (%rsi)
634; AVX512-NEXT: retq
635;
636; AVX512NOTDQ-LABEL: load_v4i1_broadcast_3_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000637; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000638; AVX512NOTDQ-NEXT: movb 3(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000639; AVX512NOTDQ-NEXT: movb %al, (%rsi)
640; AVX512NOTDQ-NEXT: retq
641 %d0 = load <4 x i1>, <4 x i1>* %a0
642 %d1 = shufflevector <4 x i1> %d0,<4 x i1> undef,<1 x i32><i32 3>
643 store <1 x i1> %d1, <1 x i1>* %a1
644 ret void
645}
646define void @load_v8i1_broadcast_4_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
647; AVX512-LABEL: load_v8i1_broadcast_4_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000648; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000649; AVX512-NEXT: kmovb 4(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000650; AVX512-NEXT: kmovb %k0, (%rsi)
651; AVX512-NEXT: retq
652;
653; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000654; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000655; AVX512NOTDQ-NEXT: movb 4(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000656; AVX512NOTDQ-NEXT: movb %al, (%rsi)
657; AVX512NOTDQ-NEXT: retq
658 %d0 = load <8 x i1>, <8 x i1>* %a0
659 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<1 x i32><i32 4>
660 store <1 x i1> %d1, <1 x i1>* %a1
661 ret void
662}
663define void @load_v8i1_broadcast_4_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
664; AVX512-LABEL: load_v8i1_broadcast_4_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000665; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000666; AVX512-NEXT: kmovb 4(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000667; AVX512-NEXT: vpmovm2q %k0, %xmm0
668; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
669; AVX512-NEXT: vpmovq2m %xmm0, %k0
670; AVX512-NEXT: kmovb %k0, (%rsi)
671; AVX512-NEXT: retq
672;
673; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000674; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000675; AVX512NOTDQ-NEXT: kmovw 4(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000676; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
677; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
678; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000679; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
680; AVX512NOTDQ-NEXT: kmovd %k0, %eax
681; AVX512NOTDQ-NEXT: movb %al, (%rsi)
682; AVX512NOTDQ-NEXT: retq
683 %d0 = load <8 x i1>, <8 x i1>* %a0
684 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 4,i32 4>
685 store <2 x i1> %d1, <2 x i1>* %a1
686 ret void
687}
688define void @load_v8i1_broadcast_7_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
689; AVX512-LABEL: load_v8i1_broadcast_7_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000690; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000691; AVX512-NEXT: kmovb 7(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000692; AVX512-NEXT: kmovb %k0, (%rsi)
693; AVX512-NEXT: retq
694;
695; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000696; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000697; AVX512NOTDQ-NEXT: movb 7(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000698; AVX512NOTDQ-NEXT: movb %al, (%rsi)
699; AVX512NOTDQ-NEXT: retq
700 %d0 = load <8 x i1>, <8 x i1>* %a0
701 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<1 x i32><i32 7>
702 store <1 x i1> %d1, <1 x i1>* %a1
703 ret void
704}
705define void @load_v8i1_broadcast_7_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
706; AVX512-LABEL: load_v8i1_broadcast_7_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000707; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000708; AVX512-NEXT: kmovb 6(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000709; AVX512-NEXT: vpmovm2q %k0, %xmm0
710; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
711; AVX512-NEXT: vpmovq2m %xmm0, %k0
712; AVX512-NEXT: kmovb %k0, (%rsi)
713; AVX512-NEXT: retq
714;
715; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000716; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000717; AVX512NOTDQ-NEXT: kmovw 6(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000718; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
719; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
720; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000721; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
722; AVX512NOTDQ-NEXT: kmovd %k0, %eax
723; AVX512NOTDQ-NEXT: movb %al, (%rsi)
724; AVX512NOTDQ-NEXT: retq
725 %d0 = load <8 x i1>, <8 x i1>* %a0
726 %d1 = shufflevector <8 x i1> %d0,<8 x i1> undef,<2 x i32><i32 7,i32 7>
727 store <2 x i1> %d1, <2 x i1>* %a1
728 ret void
729}
730define void @load_v16i1_broadcast_8_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
731; AVX512-LABEL: load_v16i1_broadcast_8_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000732; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000733; AVX512-NEXT: kmovb 8(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000734; AVX512-NEXT: kmovb %k0, (%rsi)
735; AVX512-NEXT: retq
736;
737; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000738; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000739; AVX512NOTDQ-NEXT: movb 8(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000740; AVX512NOTDQ-NEXT: movb %al, (%rsi)
741; AVX512NOTDQ-NEXT: retq
742 %d0 = load <16 x i1>, <16 x i1>* %a0
743 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<1 x i32><i32 8>
744 store <1 x i1> %d1, <1 x i1>* %a1
745 ret void
746}
747define void @load_v16i1_broadcast_8_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
748; AVX512-LABEL: load_v16i1_broadcast_8_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000749; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000750; AVX512-NEXT: kmovb 8(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000751; AVX512-NEXT: vpmovm2q %k0, %xmm0
752; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
753; AVX512-NEXT: vpmovq2m %xmm0, %k0
754; AVX512-NEXT: kmovb %k0, (%rsi)
755; AVX512-NEXT: retq
756;
757; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000758; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000759; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000760; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
761; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
762; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000763; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
764; AVX512NOTDQ-NEXT: kmovd %k0, %eax
765; AVX512NOTDQ-NEXT: movb %al, (%rsi)
766; AVX512NOTDQ-NEXT: retq
767 %d0 = load <16 x i1>, <16 x i1>* %a0
768 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 8,i32 8>
769 store <2 x i1> %d1, <2 x i1>* %a1
770 ret void
771}
772define void @load_v16i1_broadcast_8_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
773; AVX512-LABEL: load_v16i1_broadcast_8_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000774; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000775; AVX512-NEXT: kmovb 8(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000776; AVX512-NEXT: vpmovm2d %k0, %xmm0
777; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
778; AVX512-NEXT: vpmovd2m %xmm0, %k0
779; AVX512-NEXT: kmovb %k0, (%rsi)
780; AVX512-NEXT: retq
781;
782; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000783; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000784; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000785; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
786; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
787; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000788; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
789; AVX512NOTDQ-NEXT: kmovd %k0, %eax
790; AVX512NOTDQ-NEXT: movb %al, (%rsi)
791; AVX512NOTDQ-NEXT: retq
792 %d0 = load <16 x i1>, <16 x i1>* %a0
793 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 8,i32 8,i32 8,i32 8>
794 store <4 x i1> %d1, <4 x i1>* %a1
795 ret void
796}
797define void @load_v16i1_broadcast_15_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
798; AVX512-LABEL: load_v16i1_broadcast_15_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000799; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000800; AVX512-NEXT: kmovb 15(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000801; AVX512-NEXT: kmovb %k0, (%rsi)
802; AVX512-NEXT: retq
803;
804; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000805; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000806; AVX512NOTDQ-NEXT: movb 15(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000807; AVX512NOTDQ-NEXT: movb %al, (%rsi)
808; AVX512NOTDQ-NEXT: retq
809 %d0 = load <16 x i1>, <16 x i1>* %a0
810 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<1 x i32><i32 15>
811 store <1 x i1> %d1, <1 x i1>* %a1
812 ret void
813}
814define void @load_v16i1_broadcast_15_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
815; AVX512-LABEL: load_v16i1_broadcast_15_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000816; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000817; AVX512-NEXT: kmovb 14(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000818; AVX512-NEXT: vpmovm2q %k0, %xmm0
819; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
820; AVX512-NEXT: vpmovq2m %xmm0, %k0
821; AVX512-NEXT: kmovb %k0, (%rsi)
822; AVX512-NEXT: retq
823;
824; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000825; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000826; AVX512NOTDQ-NEXT: kmovw 14(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000827; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
828; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
829; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000830; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
831; AVX512NOTDQ-NEXT: kmovd %k0, %eax
832; AVX512NOTDQ-NEXT: movb %al, (%rsi)
833; AVX512NOTDQ-NEXT: retq
834 %d0 = load <16 x i1>, <16 x i1>* %a0
835 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<2 x i32><i32 15,i32 15>
836 store <2 x i1> %d1, <2 x i1>* %a1
837 ret void
838}
839define void @load_v16i1_broadcast_15_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
840; AVX512-LABEL: load_v16i1_broadcast_15_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000841; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000842; AVX512-NEXT: kmovb 12(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000843; AVX512-NEXT: vpmovm2d %k0, %xmm0
844; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
845; AVX512-NEXT: vpmovd2m %xmm0, %k0
846; AVX512-NEXT: kmovb %k0, (%rsi)
847; AVX512-NEXT: retq
848;
849; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000850; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000851; AVX512NOTDQ-NEXT: kmovw 12(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000852; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
853; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
854; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000855; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
856; AVX512NOTDQ-NEXT: kmovd %k0, %eax
857; AVX512NOTDQ-NEXT: movb %al, (%rsi)
858; AVX512NOTDQ-NEXT: retq
859 %d0 = load <16 x i1>, <16 x i1>* %a0
860 %d1 = shufflevector <16 x i1> %d0,<16 x i1> undef,<4 x i32><i32 15,i32 15,i32 15,i32 15>
861 store <4 x i1> %d1, <4 x i1>* %a1
862 ret void
863}
864define void @load_v32i1_broadcast_16_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
865; AVX512-LABEL: load_v32i1_broadcast_16_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000866; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000867; AVX512-NEXT: kmovb 16(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000868; AVX512-NEXT: kmovb %k0, (%rsi)
869; AVX512-NEXT: retq
870;
871; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000872; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000873; AVX512NOTDQ-NEXT: movb 16(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000874; AVX512NOTDQ-NEXT: movb %al, (%rsi)
875; AVX512NOTDQ-NEXT: retq
876 %d0 = load <32 x i1>, <32 x i1>* %a0
877 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<1 x i32><i32 16>
878 store <1 x i1> %d1, <1 x i1>* %a1
879 ret void
880}
881define void @load_v32i1_broadcast_16_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
882; AVX512-LABEL: load_v32i1_broadcast_16_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000883; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000884; AVX512-NEXT: kmovb 16(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000885; AVX512-NEXT: vpmovm2q %k0, %xmm0
886; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
887; AVX512-NEXT: vpmovq2m %xmm0, %k0
888; AVX512-NEXT: kmovb %k0, (%rsi)
889; AVX512-NEXT: retq
890;
891; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000892; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000893; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000894; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
895; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
896; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000897; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
898; AVX512NOTDQ-NEXT: kmovd %k0, %eax
899; AVX512NOTDQ-NEXT: movb %al, (%rsi)
900; AVX512NOTDQ-NEXT: retq
901 %d0 = load <32 x i1>, <32 x i1>* %a0
902 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 16,i32 16>
903 store <2 x i1> %d1, <2 x i1>* %a1
904 ret void
905}
906define void @load_v32i1_broadcast_16_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
907; AVX512-LABEL: load_v32i1_broadcast_16_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000908; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000909; AVX512-NEXT: kmovb 16(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000910; AVX512-NEXT: vpmovm2d %k0, %xmm0
911; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
912; AVX512-NEXT: vpmovd2m %xmm0, %k0
913; AVX512-NEXT: kmovb %k0, (%rsi)
914; AVX512-NEXT: retq
915;
916; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000917; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000918; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000919; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
920; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
921; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000922; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
923; AVX512NOTDQ-NEXT: kmovd %k0, %eax
924; AVX512NOTDQ-NEXT: movb %al, (%rsi)
925; AVX512NOTDQ-NEXT: retq
926 %d0 = load <32 x i1>, <32 x i1>* %a0
927 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 16,i32 16,i32 16,i32 16>
928 store <4 x i1> %d1, <4 x i1>* %a1
929 ret void
930}
931define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
932; AVX512-LABEL: load_v32i1_broadcast_16_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000933; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000934; AVX512-NEXT: kmovb 16(%rdi), %k0
Craig Topper410a2892017-12-21 18:44:06 +0000935; AVX512-NEXT: vpmovm2d %k0, %ymm0
936; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
937; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000938; AVX512-NEXT: kmovb %k0, (%rsi)
939; AVX512-NEXT: vzeroupper
940; AVX512-NEXT: retq
941;
942; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000943; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000944; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
Craig Topper410a2892017-12-21 18:44:06 +0000945; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
946; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
947; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +0000948; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000949; AVX512NOTDQ-NEXT: kmovd %k0, %eax
950; AVX512NOTDQ-NEXT: movb %al, (%rsi)
951; AVX512NOTDQ-NEXT: vzeroupper
952; AVX512NOTDQ-NEXT: retq
953 %d0 = load <32 x i1>, <32 x i1>* %a0
954 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16,i32 16>
955 store <8 x i1> %d1, <8 x i1>* %a1
956 ret void
957}
958define void @load_v32i1_broadcast_31_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
959; AVX512-LABEL: load_v32i1_broadcast_31_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000960; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000961; AVX512-NEXT: kmovb 31(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000962; AVX512-NEXT: kmovb %k0, (%rsi)
963; AVX512-NEXT: retq
964;
965; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000966; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000967; AVX512NOTDQ-NEXT: movb 31(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000968; AVX512NOTDQ-NEXT: movb %al, (%rsi)
969; AVX512NOTDQ-NEXT: retq
970 %d0 = load <32 x i1>, <32 x i1>* %a0
971 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<1 x i32><i32 31>
972 store <1 x i1> %d1, <1 x i1>* %a1
973 ret void
974}
975define void @load_v32i1_broadcast_31_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
976; AVX512-LABEL: load_v32i1_broadcast_31_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000977; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +0000978; AVX512-NEXT: kmovb 30(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000979; AVX512-NEXT: vpmovm2q %k0, %xmm0
980; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
981; AVX512-NEXT: vpmovq2m %xmm0, %k0
982; AVX512-NEXT: kmovb %k0, (%rsi)
983; AVX512-NEXT: retq
984;
985; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000986; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +0000987; AVX512NOTDQ-NEXT: kmovw 30(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000988; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
989; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
990; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +0000991; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
992; AVX512NOTDQ-NEXT: kmovd %k0, %eax
993; AVX512NOTDQ-NEXT: movb %al, (%rsi)
994; AVX512NOTDQ-NEXT: retq
995 %d0 = load <32 x i1>, <32 x i1>* %a0
996 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<2 x i32><i32 31,i32 31>
997 store <2 x i1> %d1, <2 x i1>* %a1
998 ret void
999}
1000define void @load_v32i1_broadcast_31_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
1001; AVX512-LABEL: load_v32i1_broadcast_31_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001002; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001003; AVX512-NEXT: kmovb 28(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001004; AVX512-NEXT: vpmovm2d %k0, %xmm0
1005; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1006; AVX512-NEXT: vpmovd2m %xmm0, %k0
1007; AVX512-NEXT: kmovb %k0, (%rsi)
1008; AVX512-NEXT: retq
1009;
1010; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001011; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +00001012; AVX512NOTDQ-NEXT: kmovw 28(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001013; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1014; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1015; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001016; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1017; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1018; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1019; AVX512NOTDQ-NEXT: retq
1020 %d0 = load <32 x i1>, <32 x i1>* %a0
1021 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<4 x i32><i32 31,i32 31,i32 31,i32 31>
1022 store <4 x i1> %d1, <4 x i1>* %a1
1023 ret void
1024}
1025define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
1026; AVX512-LABEL: load_v32i1_broadcast_31_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001027; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001028; AVX512-NEXT: kmovb 24(%rdi), %k0
Craig Topper410a2892017-12-21 18:44:06 +00001029; AVX512-NEXT: vpmovm2d %k0, %ymm0
Zvi Rackover72b0bb12018-01-09 16:26:06 +00001030; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
1031; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001032; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001033; AVX512-NEXT: kmovb %k0, (%rsi)
1034; AVX512-NEXT: vzeroupper
1035; AVX512-NEXT: retq
1036;
1037; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001038; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +00001039; AVX512NOTDQ-NEXT: kmovw 24(%rdi), %k1
Craig Topper410a2892017-12-21 18:44:06 +00001040; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1041; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
Zvi Rackover72b0bb12018-01-09 16:26:06 +00001042; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
1043; AVX512NOTDQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001044; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001045; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1046; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1047; AVX512NOTDQ-NEXT: vzeroupper
1048; AVX512NOTDQ-NEXT: retq
1049 %d0 = load <32 x i1>, <32 x i1>* %a0
1050 %d1 = shufflevector <32 x i1> %d0,<32 x i1> undef,<8 x i32><i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31,i32 31>
1051 store <8 x i1> %d1, <8 x i1>* %a1
1052 ret void
1053}
1054define void @load_v64i1_broadcast_32_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
1055; AVX512-LABEL: load_v64i1_broadcast_32_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001056; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001057; AVX512-NEXT: kmovb 32(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001058; AVX512-NEXT: kmovb %k0, (%rsi)
1059; AVX512-NEXT: retq
1060;
1061; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001062; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001063; AVX512NOTDQ-NEXT: movb 32(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001064; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1065; AVX512NOTDQ-NEXT: retq
1066 %d0 = load <64 x i1>, <64 x i1>* %a0
1067 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<1 x i32><i32 32>
1068 store <1 x i1> %d1, <1 x i1>* %a1
1069 ret void
1070}
1071define void @load_v64i1_broadcast_32_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
1072; AVX512-LABEL: load_v64i1_broadcast_32_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001073; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001074; AVX512-NEXT: kmovb 32(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001075; AVX512-NEXT: vpmovm2q %k0, %xmm0
1076; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
1077; AVX512-NEXT: vpmovq2m %xmm0, %k0
1078; AVX512-NEXT: kmovb %k0, (%rsi)
1079; AVX512-NEXT: retq
1080;
1081; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001082; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +00001083; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001084; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1085; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1086; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001087; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
1088; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1089; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1090; AVX512NOTDQ-NEXT: retq
1091 %d0 = load <64 x i1>, <64 x i1>* %a0
1092 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 32,i32 32>
1093 store <2 x i1> %d1, <2 x i1>* %a1
1094 ret void
1095}
1096define void @load_v64i1_broadcast_32_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
1097; AVX512-LABEL: load_v64i1_broadcast_32_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001098; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001099; AVX512-NEXT: kmovb 32(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001100; AVX512-NEXT: vpmovm2d %k0, %xmm0
1101; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
1102; AVX512-NEXT: vpmovd2m %xmm0, %k0
1103; AVX512-NEXT: kmovb %k0, (%rsi)
1104; AVX512-NEXT: retq
1105;
1106; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001107; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +00001108; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001109; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1110; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1111; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001112; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1113; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1114; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1115; AVX512NOTDQ-NEXT: retq
1116 %d0 = load <64 x i1>, <64 x i1>* %a0
1117 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 32,i32 32,i32 32,i32 32>
1118 store <4 x i1> %d1, <4 x i1>* %a1
1119 ret void
1120}
1121define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
1122; AVX512-LABEL: load_v64i1_broadcast_32_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001123; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001124; AVX512-NEXT: kmovb 32(%rdi), %k0
Craig Topper410a2892017-12-21 18:44:06 +00001125; AVX512-NEXT: vpmovm2d %k0, %ymm0
1126; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
1127; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001128; AVX512-NEXT: kmovb %k0, (%rsi)
1129; AVX512-NEXT: vzeroupper
1130; AVX512-NEXT: retq
1131;
1132; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001133; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +00001134; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
Craig Topper410a2892017-12-21 18:44:06 +00001135; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1136; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1137; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001138; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001139; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1140; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1141; AVX512NOTDQ-NEXT: vzeroupper
1142; AVX512NOTDQ-NEXT: retq
1143 %d0 = load <64 x i1>, <64 x i1>* %a0
1144 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
1145 store <8 x i1> %d1, <8 x i1>* %a1
1146 ret void
1147}
1148define void @load_v64i1_broadcast_32_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
1149; AVX512-LABEL: load_v64i1_broadcast_32_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001150; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001151; AVX512-NEXT: kmovw 32(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001152; AVX512-NEXT: vpmovm2d %k0, %zmm0
1153; AVX512-NEXT: vpbroadcastd %xmm0, %zmm0
1154; AVX512-NEXT: vpmovd2m %zmm0, %k0
1155; AVX512-NEXT: kmovw %k0, (%rsi)
1156; AVX512-NEXT: vzeroupper
1157; AVX512-NEXT: retq
1158;
1159; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001160; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001161; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001162; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1163; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %zmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001164; AVX512NOTDQ-NEXT: vptestmd %zmm0, %zmm0, %k0
1165; AVX512NOTDQ-NEXT: kmovw %k0, (%rsi)
1166; AVX512NOTDQ-NEXT: vzeroupper
1167; AVX512NOTDQ-NEXT: retq
1168 %d0 = load <64 x i1>, <64 x i1>* %a0
1169 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32,i32 32>
1170 store <16 x i1> %d1, <16 x i1>* %a1
1171 ret void
1172}
1173define void @load_v64i1_broadcast_63_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
1174; AVX512-LABEL: load_v64i1_broadcast_63_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001175; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001176; AVX512-NEXT: kmovb 63(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001177; AVX512-NEXT: kmovb %k0, (%rsi)
1178; AVX512-NEXT: retq
1179;
1180; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v1i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001181; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001182; AVX512NOTDQ-NEXT: movb 63(%rdi), %al
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001183; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1184; AVX512NOTDQ-NEXT: retq
1185 %d0 = load <64 x i1>, <64 x i1>* %a0
1186 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<1 x i32><i32 63>
1187 store <1 x i1> %d1, <1 x i1>* %a1
1188 ret void
1189}
1190define void @load_v64i1_broadcast_63_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
1191; AVX512-LABEL: load_v64i1_broadcast_63_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001192; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001193; AVX512-NEXT: kmovb 62(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001194; AVX512-NEXT: vpmovm2q %k0, %xmm0
1195; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1196; AVX512-NEXT: vpmovq2m %xmm0, %k0
1197; AVX512-NEXT: kmovb %k0, (%rsi)
1198; AVX512-NEXT: retq
1199;
1200; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001201; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +00001202; AVX512NOTDQ-NEXT: kmovw 62(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001203; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1204; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1205; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001206; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
1207; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1208; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1209; AVX512NOTDQ-NEXT: retq
1210 %d0 = load <64 x i1>, <64 x i1>* %a0
1211 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<2 x i32><i32 63,i32 63>
1212 store <2 x i1> %d1, <2 x i1>* %a1
1213 ret void
1214}
1215define void @load_v64i1_broadcast_63_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
1216; AVX512-LABEL: load_v64i1_broadcast_63_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001217; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001218; AVX512-NEXT: kmovb 60(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001219; AVX512-NEXT: vpmovm2d %k0, %xmm0
1220; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
1221; AVX512-NEXT: vpmovd2m %xmm0, %k0
1222; AVX512-NEXT: kmovb %k0, (%rsi)
1223; AVX512-NEXT: retq
1224;
1225; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001226; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +00001227; AVX512NOTDQ-NEXT: kmovw 60(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001228; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1229; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1230; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001231; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
1232; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1233; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1234; AVX512NOTDQ-NEXT: retq
1235 %d0 = load <64 x i1>, <64 x i1>* %a0
1236 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<4 x i32><i32 63,i32 63,i32 63,i32 63>
1237 store <4 x i1> %d1, <4 x i1>* %a1
1238 ret void
1239}
1240define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
1241; AVX512-LABEL: load_v64i1_broadcast_63_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001242; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001243; AVX512-NEXT: kmovb 56(%rdi), %k0
Craig Topper410a2892017-12-21 18:44:06 +00001244; AVX512-NEXT: vpmovm2d %k0, %ymm0
Zvi Rackover72b0bb12018-01-09 16:26:06 +00001245; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
1246; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001247; AVX512-NEXT: vpmovd2m %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001248; AVX512-NEXT: kmovb %k0, (%rsi)
1249; AVX512-NEXT: vzeroupper
1250; AVX512-NEXT: retq
1251;
1252; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001253; AVX512NOTDQ: # %bb.0:
Craig Topperbf615252019-01-12 02:22:10 +00001254; AVX512NOTDQ-NEXT: kmovw 56(%rdi), %k1
Craig Topper410a2892017-12-21 18:44:06 +00001255; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1256; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
Zvi Rackover72b0bb12018-01-09 16:26:06 +00001257; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
1258; AVX512NOTDQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
Craig Topper410a2892017-12-21 18:44:06 +00001259; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001260; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1261; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1262; AVX512NOTDQ-NEXT: vzeroupper
1263; AVX512NOTDQ-NEXT: retq
1264 %d0 = load <64 x i1>, <64 x i1>* %a0
1265 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<8 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
1266 store <8 x i1> %d1, <8 x i1>* %a1
1267 ret void
1268}
1269define void @load_v64i1_broadcast_63_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
1270; AVX512-LABEL: load_v64i1_broadcast_63_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001271; AVX512: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001272; AVX512-NEXT: kmovw 48(%rdi), %k0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001273; AVX512-NEXT: vpmovm2d %k0, %zmm0
1274; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1275; AVX512-NEXT: vpermd %zmm0, %zmm1, %zmm0
1276; AVX512-NEXT: vpmovd2m %zmm0, %k0
1277; AVX512-NEXT: kmovw %k0, (%rsi)
1278; AVX512-NEXT: vzeroupper
1279; AVX512-NEXT: retq
1280;
1281; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1_store:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001282; AVX512NOTDQ: # %bb.0:
Sanjay Patel0a515592018-11-10 20:05:31 +00001283; AVX512NOTDQ-NEXT: kmovw 48(%rdi), %k1
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001284; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1285; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1286; AVX512NOTDQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
Michael Zuckerman0c20b692017-11-02 12:19:36 +00001287; AVX512NOTDQ-NEXT: vptestmd %zmm0, %zmm0, %k0
1288; AVX512NOTDQ-NEXT: kmovw %k0, (%rsi)
1289; AVX512NOTDQ-NEXT: vzeroupper
1290; AVX512NOTDQ-NEXT: retq
1291 %d0 = load <64 x i1>, <64 x i1>* %a0
1292 %d1 = shufflevector <64 x i1> %d0,<64 x i1> undef,<16 x i32><i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63,i32 63>
1293 store <16 x i1> %d1, <16 x i1>* %a1
1294 ret void
1295}
1296