blob: 4eacbdd4ccb53596992888d759f8e8f311abb3d4 [file] [log] [blame]
Yael Tsafrir47668b52017-09-12 07:50:35 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BWVL
4
5define <16 x i8> @avg_v16i8_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %src, i16 %mask) nounwind {
6; AVX512F-LABEL: avg_v16i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +00008; AVX512F-NEXT: vpavgb %xmm1, %xmm0, %xmm0
9; AVX512F-NEXT: kmovw %edi, %k1
10; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
11; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
12; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0
13; AVX512F-NEXT: vzeroupper
14; AVX512F-NEXT: retq
15;
16; AVX512BWVL-LABEL: avg_v16i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000017; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000018; AVX512BWVL-NEXT: kmovd %edi, %k1
19; AVX512BWVL-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1}
20; AVX512BWVL-NEXT: vmovdqa %xmm2, %xmm0
21; AVX512BWVL-NEXT: retq
22 %za = zext <16 x i8> %a to <16 x i16>
23 %zb = zext <16 x i8> %b to <16 x i16>
24 %add = add nuw nsw <16 x i16> %za, %zb
25 %add1 = add nuw nsw <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
26 %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
27 %trunc = trunc <16 x i16> %lshr to <16 x i8>
28 %mask1 = bitcast i16 %mask to <16 x i1>
29 %res = select <16 x i1> %mask1, <16 x i8> %trunc, <16 x i8> %src
30 ret <16 x i8> %res
31}
32
33define <16 x i8> @avg_v16i8_maskz(<16 x i8> %a, <16 x i8> %b, i16 %mask) nounwind {
34; AVX512F-LABEL: avg_v16i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000035; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000036; AVX512F-NEXT: vpavgb %xmm1, %xmm0, %xmm0
37; AVX512F-NEXT: kmovw %edi, %k1
38; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
39; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
40; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0
41; AVX512F-NEXT: vzeroupper
42; AVX512F-NEXT: retq
43;
44; AVX512BWVL-LABEL: avg_v16i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000045; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000046; AVX512BWVL-NEXT: kmovd %edi, %k1
47; AVX512BWVL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 {%k1} {z}
48; AVX512BWVL-NEXT: retq
49 %za = zext <16 x i8> %a to <16 x i16>
50 %zb = zext <16 x i8> %b to <16 x i16>
51 %add = add nuw nsw <16 x i16> %za, %zb
52 %add1 = add nuw nsw <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
53 %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
54 %trunc = trunc <16 x i16> %lshr to <16 x i8>
55 %mask1 = bitcast i16 %mask to <16 x i1>
56 %res = select <16 x i1> %mask1, <16 x i8> %trunc, <16 x i8> zeroinitializer
57 ret <16 x i8> %res
58}
59
60define <32 x i8> @avg_v32i8_mask(<32 x i8> %a, <32 x i8> %b, <32 x i8> %src, i32 %mask) nounwind {
61; AVX512F-LABEL: avg_v32i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000062; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000063; AVX512F-NEXT: pushq %rbp
64; AVX512F-NEXT: movq %rsp, %rbp
65; AVX512F-NEXT: andq $-32, %rsp
66; AVX512F-NEXT: subq $32, %rsp
67; AVX512F-NEXT: movl %edi, (%rsp)
68; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
69; AVX512F-NEXT: kmovw (%rsp), %k1
70; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
71; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
72; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
73; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
74; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
75; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
76; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
77; AVX512F-NEXT: movq %rbp, %rsp
78; AVX512F-NEXT: popq %rbp
79; AVX512F-NEXT: retq
80;
81; AVX512BWVL-LABEL: avg_v32i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000082; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000083; AVX512BWVL-NEXT: kmovd %edi, %k1
84; AVX512BWVL-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1}
85; AVX512BWVL-NEXT: vmovdqa %ymm2, %ymm0
86; AVX512BWVL-NEXT: retq
87 %za = zext <32 x i8> %a to <32 x i16>
88 %zb = zext <32 x i8> %b to <32 x i16>
89 %add = add nuw nsw <32 x i16> %za, %zb
90 %add1 = add nuw nsw <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
91 %lshr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
92 %trunc = trunc <32 x i16> %lshr to <32 x i8>
93 %mask1 = bitcast i32 %mask to <32 x i1>
94 %res = select <32 x i1> %mask1, <32 x i8> %trunc, <32 x i8> %src
95 ret <32 x i8> %res
96}
97
98define <32 x i8> @avg_v32i8_maskz(<32 x i8> %a, <32 x i8> %b, i32 %mask) nounwind {
99; AVX512F-LABEL: avg_v32i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000101; AVX512F-NEXT: pushq %rbp
102; AVX512F-NEXT: movq %rsp, %rbp
103; AVX512F-NEXT: andq $-32, %rsp
104; AVX512F-NEXT: subq $32, %rsp
105; AVX512F-NEXT: movl %edi, (%rsp)
106; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
107; AVX512F-NEXT: kmovw (%rsp), %k1
108; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
109; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
110; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
111; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
112; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
113; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
114; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
115; AVX512F-NEXT: movq %rbp, %rsp
116; AVX512F-NEXT: popq %rbp
117; AVX512F-NEXT: retq
118;
119; AVX512BWVL-LABEL: avg_v32i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000120; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000121; AVX512BWVL-NEXT: kmovd %edi, %k1
122; AVX512BWVL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 {%k1} {z}
123; AVX512BWVL-NEXT: retq
124 %za = zext <32 x i8> %a to <32 x i16>
125 %zb = zext <32 x i8> %b to <32 x i16>
126 %add = add nuw nsw <32 x i16> %za, %zb
127 %add1 = add nuw nsw <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
128 %lshr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
129 %trunc = trunc <32 x i16> %lshr to <32 x i8>
130 %mask1 = bitcast i32 %mask to <32 x i1>
131 %res = select <32 x i1> %mask1, <32 x i8> %trunc, <32 x i8> zeroinitializer
132 ret <32 x i8> %res
133}
134
135define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64 %mask) nounwind {
136; AVX512F-LABEL: avg_v64i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000137; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000138; AVX512F-NEXT: pushq %rbp
139; AVX512F-NEXT: movq %rsp, %rbp
140; AVX512F-NEXT: andq $-32, %rsp
141; AVX512F-NEXT: subq $64, %rsp
142; AVX512F-NEXT: movq %rdi, %rax
143; AVX512F-NEXT: shrq $32, %rax
144; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp)
145; AVX512F-NEXT: movl %edi, (%rsp)
146; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm6
147; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm8
148; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm7
149; AVX512F-NEXT: vpavgb %xmm7, %xmm6, %xmm6
150; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm7
151; AVX512F-NEXT: vpavgb %xmm7, %xmm8, %xmm7
152; AVX512F-NEXT: vpavgb %xmm3, %xmm1, %xmm1
153; AVX512F-NEXT: vinserti128 $1, %xmm7, %ymm1, %ymm1
154; AVX512F-NEXT: vpavgb %xmm2, %xmm0, %xmm0
155; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
156; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
157; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
158; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
159; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
160; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
161; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
162; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
163; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
164; AVX512F-NEXT: kmovw (%rsp), %k1
165; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
166; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
167; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
168; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
169; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
170; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
171; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
172; AVX512F-NEXT: movq %rbp, %rsp
173; AVX512F-NEXT: popq %rbp
174; AVX512F-NEXT: retq
175;
176; AVX512BWVL-LABEL: avg_v64i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000177; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000178; AVX512BWVL-NEXT: kmovq %rdi, %k1
179; AVX512BWVL-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
180; AVX512BWVL-NEXT: vmovdqa64 %zmm2, %zmm0
181; AVX512BWVL-NEXT: retq
182 %za = zext <64 x i8> %a to <64 x i16>
183 %zb = zext <64 x i8> %b to <64 x i16>
184 %add = add nuw nsw <64 x i16> %za, %zb
185 %add1 = add nuw nsw <64 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
186 %lshr = lshr <64 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
187 %trunc = trunc <64 x i16> %lshr to <64 x i8>
188 %mask1 = bitcast i64 %mask to <64 x i1>
189 %res = select <64 x i1> %mask1, <64 x i8> %trunc, <64 x i8> %src
190 ret <64 x i8> %res
191}
192
193define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
194; AVX512F-LABEL: avg_v64i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000195; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000196; AVX512F-NEXT: pushq %rbp
197; AVX512F-NEXT: movq %rsp, %rbp
198; AVX512F-NEXT: andq $-32, %rsp
199; AVX512F-NEXT: subq $64, %rsp
200; AVX512F-NEXT: movq %rdi, %rax
201; AVX512F-NEXT: shrq $32, %rax
202; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp)
203; AVX512F-NEXT: movl %edi, (%rsp)
204; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4
205; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm5
206; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm6
207; AVX512F-NEXT: vpavgb %xmm6, %xmm4, %xmm4
208; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm6
209; AVX512F-NEXT: vpavgb %xmm6, %xmm5, %xmm5
210; AVX512F-NEXT: vpavgb %xmm3, %xmm1, %xmm1
211; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1
212; AVX512F-NEXT: vpavgb %xmm2, %xmm0, %xmm0
213; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0
214; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
215; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
216; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
217; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
218; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
219; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
220; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
221; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
222; AVX512F-NEXT: kmovw (%rsp), %k1
223; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
224; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
225; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
226; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
227; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
228; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
229; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
230; AVX512F-NEXT: movq %rbp, %rsp
231; AVX512F-NEXT: popq %rbp
232; AVX512F-NEXT: retq
233;
234; AVX512BWVL-LABEL: avg_v64i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000235; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000236; AVX512BWVL-NEXT: kmovq %rdi, %k1
237; AVX512BWVL-NEXT: vpavgb %zmm1, %zmm0, %zmm0 {%k1} {z}
238; AVX512BWVL-NEXT: retq
239 %za = zext <64 x i8> %a to <64 x i16>
240 %zb = zext <64 x i8> %b to <64 x i16>
241 %add = add nuw nsw <64 x i16> %za, %zb
242 %add1 = add nuw nsw <64 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
243 %lshr = lshr <64 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
244 %trunc = trunc <64 x i16> %lshr to <64 x i8>
245 %mask1 = bitcast i64 %mask to <64 x i1>
246 %res = select <64 x i1> %mask1, <64 x i8> %trunc, <64 x i8> zeroinitializer
247 ret <64 x i8> %res
248}
249
250define <8 x i16> @avg_v8i16_mask(<8 x i16> %a, <8 x i16> %b, <8 x i16> %src, i8 %mask) nounwind {
251; AVX512F-LABEL: avg_v8i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000252; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000253; AVX512F-NEXT: vpavgw %xmm1, %xmm0, %xmm0
254; AVX512F-NEXT: kmovw %edi, %k1
255; AVX512F-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
256; AVX512F-NEXT: vpmovqw %zmm1, %xmm1
Simon Pilgrim5e8c3f32017-10-24 17:04:57 +0000257; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000258; AVX512F-NEXT: vzeroupper
259; AVX512F-NEXT: retq
260;
261; AVX512BWVL-LABEL: avg_v8i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000262; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000263; AVX512BWVL-NEXT: kmovd %edi, %k1
264; AVX512BWVL-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1}
265; AVX512BWVL-NEXT: vmovdqa %xmm2, %xmm0
266; AVX512BWVL-NEXT: retq
267 %za = zext <8 x i16> %a to <8 x i32>
268 %zb = zext <8 x i16> %b to <8 x i32>
269 %add = add nuw nsw <8 x i32> %za, %zb
270 %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
271 %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
272 %trunc = trunc <8 x i32> %lshr to <8 x i16>
273 %mask1 = bitcast i8 %mask to <8 x i1>
274 %res = select <8 x i1> %mask1, <8 x i16> %trunc, <8 x i16> %src
275 ret <8 x i16> %res
276}
277
278define <8 x i16> @avg_v8i16_maskz(<8 x i16> %a, <8 x i16> %b, i8 %mask) nounwind {
279; AVX512F-LABEL: avg_v8i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000280; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000281; AVX512F-NEXT: vpavgw %xmm1, %xmm0, %xmm0
282; AVX512F-NEXT: kmovw %edi, %k1
283; AVX512F-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
284; AVX512F-NEXT: vpmovqw %zmm1, %xmm1
285; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0
286; AVX512F-NEXT: vzeroupper
287; AVX512F-NEXT: retq
288;
289; AVX512BWVL-LABEL: avg_v8i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000290; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000291; AVX512BWVL-NEXT: kmovd %edi, %k1
292; AVX512BWVL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 {%k1} {z}
293; AVX512BWVL-NEXT: retq
294 %za = zext <8 x i16> %a to <8 x i32>
295 %zb = zext <8 x i16> %b to <8 x i32>
296 %add = add nuw nsw <8 x i32> %za, %zb
297 %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
298 %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
299 %trunc = trunc <8 x i32> %lshr to <8 x i16>
300 %mask1 = bitcast i8 %mask to <8 x i1>
301 %res = select <8 x i1> %mask1, <8 x i16> %trunc, <8 x i16> zeroinitializer
302 ret <8 x i16> %res
303}
304
305define <16 x i16> @avg_v16i16_mask(<16 x i16> %a, <16 x i16> %b, <16 x i16> %src, i16 %mask) nounwind {
306; AVX512F-LABEL: avg_v16i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000307; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000308; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
309; AVX512F-NEXT: kmovw %edi, %k1
310; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
311; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Simon Pilgrim5e8c3f32017-10-24 17:04:57 +0000312; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000313; AVX512F-NEXT: retq
314;
315; AVX512BWVL-LABEL: avg_v16i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000316; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000317; AVX512BWVL-NEXT: kmovd %edi, %k1
318; AVX512BWVL-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1}
319; AVX512BWVL-NEXT: vmovdqa %ymm2, %ymm0
320; AVX512BWVL-NEXT: retq
321 %za = zext <16 x i16> %a to <16 x i32>
322 %zb = zext <16 x i16> %b to <16 x i32>
323 %add = add nuw nsw <16 x i32> %za, %zb
324 %add1 = add nuw nsw <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
325 %lshr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
326 %trunc = trunc <16 x i32> %lshr to <16 x i16>
327 %mask1 = bitcast i16 %mask to <16 x i1>
328 %res = select <16 x i1> %mask1, <16 x i16> %trunc, <16 x i16> %src
329 ret <16 x i16> %res
330}
331
332define <16 x i16> @avg_v16i16_maskz(<16 x i16> %a, <16 x i16> %b, i16 %mask) nounwind {
333; AVX512F-LABEL: avg_v16i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000334; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000335; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
336; AVX512F-NEXT: kmovw %edi, %k1
337; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
338; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
339; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
340; AVX512F-NEXT: retq
341;
342; AVX512BWVL-LABEL: avg_v16i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000343; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000344; AVX512BWVL-NEXT: kmovd %edi, %k1
345; AVX512BWVL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 {%k1} {z}
346; AVX512BWVL-NEXT: retq
347 %za = zext <16 x i16> %a to <16 x i32>
348 %zb = zext <16 x i16> %b to <16 x i32>
349 %add = add nuw nsw <16 x i32> %za, %zb
350 %add1 = add nuw nsw <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
351 %lshr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
352 %trunc = trunc <16 x i32> %lshr to <16 x i16>
353 %mask1 = bitcast i16 %mask to <16 x i1>
354 %res = select <16 x i1> %mask1, <16 x i16> %trunc, <16 x i16> zeroinitializer
355 ret <16 x i16> %res
356}
357
358define <32 x i16> @avg_v32i16_mask(<32 x i16> %a, <32 x i16> %b, <32 x i16> %src, i32 %mask) nounwind {
359; AVX512F-LABEL: avg_v32i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000360; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000361; AVX512F-NEXT: pushq %rbp
362; AVX512F-NEXT: movq %rsp, %rbp
363; AVX512F-NEXT: andq $-32, %rsp
364; AVX512F-NEXT: subq $32, %rsp
365; AVX512F-NEXT: movl %edi, (%rsp)
366; AVX512F-NEXT: kmovw (%rsp), %k1
367; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
368; AVX512F-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
369; AVX512F-NEXT: vpmovdb %zmm6, %xmm6
370; AVX512F-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7 {%k1} {z}
371; AVX512F-NEXT: vpmovdb %zmm7, %xmm7
372; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1
373; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0
374; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
375; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
376; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
377; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
378; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
379; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
380; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
381; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
382; AVX512F-NEXT: movq %rbp, %rsp
383; AVX512F-NEXT: popq %rbp
384; AVX512F-NEXT: retq
385;
386; AVX512BWVL-LABEL: avg_v32i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000387; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000388; AVX512BWVL-NEXT: kmovd %edi, %k1
389; AVX512BWVL-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
390; AVX512BWVL-NEXT: vmovdqa64 %zmm2, %zmm0
391; AVX512BWVL-NEXT: retq
392 %za = zext <32 x i16> %a to <32 x i32>
393 %zb = zext <32 x i16> %b to <32 x i32>
394 %add = add nuw nsw <32 x i32> %za, %zb
395 %add1 = add nuw nsw <32 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
396 %lshr = lshr <32 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
397 %trunc = trunc <32 x i32> %lshr to <32 x i16>
398 %mask1 = bitcast i32 %mask to <32 x i1>
399 %res = select <32 x i1> %mask1, <32 x i16> %trunc, <32 x i16> %src
400 ret <32 x i16> %res
401}
402
403define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
404; AVX512F-LABEL: avg_v32i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000405; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000406; AVX512F-NEXT: pushq %rbp
407; AVX512F-NEXT: movq %rsp, %rbp
408; AVX512F-NEXT: andq $-32, %rsp
409; AVX512F-NEXT: subq $32, %rsp
410; AVX512F-NEXT: movl %edi, (%rsp)
411; AVX512F-NEXT: kmovw (%rsp), %k1
412; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
413; AVX512F-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z}
414; AVX512F-NEXT: vpmovdb %zmm4, %xmm4
415; AVX512F-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
416; AVX512F-NEXT: vpmovdb %zmm5, %xmm5
417; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1
418; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0
419; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
420; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
421; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
422; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
423; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
424; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
425; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
426; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
427; AVX512F-NEXT: movq %rbp, %rsp
428; AVX512F-NEXT: popq %rbp
429; AVX512F-NEXT: retq
430;
431; AVX512BWVL-LABEL: avg_v32i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000432; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000433; AVX512BWVL-NEXT: kmovd %edi, %k1
434; AVX512BWVL-NEXT: vpavgw %zmm1, %zmm0, %zmm0 {%k1} {z}
435; AVX512BWVL-NEXT: retq
436 %za = zext <32 x i16> %a to <32 x i32>
437 %zb = zext <32 x i16> %b to <32 x i32>
438 %add = add nuw nsw <32 x i32> %za, %zb
439 %add1 = add nuw nsw <32 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
440 %lshr = lshr <32 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
441 %trunc = trunc <32 x i32> %lshr to <32 x i16>
442 %mask1 = bitcast i32 %mask to <32 x i1>
443 %res = select <32 x i1> %mask1, <32 x i16> %trunc, <32 x i16> zeroinitializer
444 ret <32 x i16> %res
445}