blob: ae466b68825ed68d85ecbb5afaaf00b3d7ca7c37 [file] [log] [blame]
Yael Tsafrir47668b52017-09-12 07:50:35 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BWVL
4
5define <16 x i8> @avg_v16i8_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %src, i16 %mask) nounwind {
6; AVX512F-LABEL: avg_v16i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +00008; AVX512F-NEXT: vpavgb %xmm1, %xmm0, %xmm0
9; AVX512F-NEXT: kmovw %edi, %k1
10; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
11; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
12; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0
13; AVX512F-NEXT: vzeroupper
14; AVX512F-NEXT: retq
15;
16; AVX512BWVL-LABEL: avg_v16i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000017; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000018; AVX512BWVL-NEXT: kmovd %edi, %k1
19; AVX512BWVL-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1}
20; AVX512BWVL-NEXT: vmovdqa %xmm2, %xmm0
21; AVX512BWVL-NEXT: retq
22 %za = zext <16 x i8> %a to <16 x i16>
23 %zb = zext <16 x i8> %b to <16 x i16>
24 %add = add nuw nsw <16 x i16> %za, %zb
25 %add1 = add nuw nsw <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
26 %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
27 %trunc = trunc <16 x i16> %lshr to <16 x i8>
28 %mask1 = bitcast i16 %mask to <16 x i1>
29 %res = select <16 x i1> %mask1, <16 x i8> %trunc, <16 x i8> %src
30 ret <16 x i8> %res
31}
32
33define <16 x i8> @avg_v16i8_maskz(<16 x i8> %a, <16 x i8> %b, i16 %mask) nounwind {
34; AVX512F-LABEL: avg_v16i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000035; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000036; AVX512F-NEXT: vpavgb %xmm1, %xmm0, %xmm0
37; AVX512F-NEXT: kmovw %edi, %k1
38; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
39; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
40; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0
41; AVX512F-NEXT: vzeroupper
42; AVX512F-NEXT: retq
43;
44; AVX512BWVL-LABEL: avg_v16i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000045; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000046; AVX512BWVL-NEXT: kmovd %edi, %k1
47; AVX512BWVL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 {%k1} {z}
48; AVX512BWVL-NEXT: retq
49 %za = zext <16 x i8> %a to <16 x i16>
50 %zb = zext <16 x i8> %b to <16 x i16>
51 %add = add nuw nsw <16 x i16> %za, %zb
52 %add1 = add nuw nsw <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
53 %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
54 %trunc = trunc <16 x i16> %lshr to <16 x i8>
55 %mask1 = bitcast i16 %mask to <16 x i1>
56 %res = select <16 x i1> %mask1, <16 x i8> %trunc, <16 x i8> zeroinitializer
57 ret <16 x i8> %res
58}
59
60define <32 x i8> @avg_v32i8_mask(<32 x i8> %a, <32 x i8> %b, <32 x i8> %src, i32 %mask) nounwind {
61; AVX512F-LABEL: avg_v32i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000062; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000063; AVX512F-NEXT: pushq %rbp
64; AVX512F-NEXT: movq %rsp, %rbp
65; AVX512F-NEXT: andq $-32, %rsp
66; AVX512F-NEXT: subq $32, %rsp
67; AVX512F-NEXT: movl %edi, (%rsp)
68; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
69; AVX512F-NEXT: kmovw (%rsp), %k1
70; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
71; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
72; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
73; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
74; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
75; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
76; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
77; AVX512F-NEXT: movq %rbp, %rsp
78; AVX512F-NEXT: popq %rbp
79; AVX512F-NEXT: retq
80;
81; AVX512BWVL-LABEL: avg_v32i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000082; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000083; AVX512BWVL-NEXT: kmovd %edi, %k1
84; AVX512BWVL-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1}
85; AVX512BWVL-NEXT: vmovdqa %ymm2, %ymm0
86; AVX512BWVL-NEXT: retq
87 %za = zext <32 x i8> %a to <32 x i16>
88 %zb = zext <32 x i8> %b to <32 x i16>
89 %add = add nuw nsw <32 x i16> %za, %zb
90 %add1 = add nuw nsw <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
91 %lshr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
92 %trunc = trunc <32 x i16> %lshr to <32 x i8>
93 %mask1 = bitcast i32 %mask to <32 x i1>
94 %res = select <32 x i1> %mask1, <32 x i8> %trunc, <32 x i8> %src
95 ret <32 x i8> %res
96}
97
98define <32 x i8> @avg_v32i8_maskz(<32 x i8> %a, <32 x i8> %b, i32 %mask) nounwind {
99; AVX512F-LABEL: avg_v32i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000101; AVX512F-NEXT: pushq %rbp
102; AVX512F-NEXT: movq %rsp, %rbp
103; AVX512F-NEXT: andq $-32, %rsp
104; AVX512F-NEXT: subq $32, %rsp
105; AVX512F-NEXT: movl %edi, (%rsp)
106; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
107; AVX512F-NEXT: kmovw (%rsp), %k1
108; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
109; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
110; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
111; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
112; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
113; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
114; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
115; AVX512F-NEXT: movq %rbp, %rsp
116; AVX512F-NEXT: popq %rbp
117; AVX512F-NEXT: retq
118;
119; AVX512BWVL-LABEL: avg_v32i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000120; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000121; AVX512BWVL-NEXT: kmovd %edi, %k1
122; AVX512BWVL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 {%k1} {z}
123; AVX512BWVL-NEXT: retq
124 %za = zext <32 x i8> %a to <32 x i16>
125 %zb = zext <32 x i8> %b to <32 x i16>
126 %add = add nuw nsw <32 x i16> %za, %zb
127 %add1 = add nuw nsw <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
128 %lshr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
129 %trunc = trunc <32 x i16> %lshr to <32 x i8>
130 %mask1 = bitcast i32 %mask to <32 x i1>
131 %res = select <32 x i1> %mask1, <32 x i8> %trunc, <32 x i8> zeroinitializer
132 ret <32 x i8> %res
133}
134
135define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64 %mask) nounwind {
136; AVX512F-LABEL: avg_v64i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000137; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000138; AVX512F-NEXT: pushq %rbp
139; AVX512F-NEXT: movq %rsp, %rbp
140; AVX512F-NEXT: andq $-32, %rsp
141; AVX512F-NEXT: subq $64, %rsp
Yael Tsafrir47668b52017-09-12 07:50:35 +0000142; AVX512F-NEXT: movl %edi, (%rsp)
Craig Toppere9fc0cd2018-01-14 02:05:51 +0000143; AVX512F-NEXT: shrq $32, %rdi
144; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
Simon Pilgrim4de5bb02017-12-21 18:12:31 +0000145; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1
146; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000147; AVX512F-NEXT: kmovw (%rsp), %k1
148; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
149; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
150; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
151; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
152; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
153; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
154; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
Craig Toppere9fc0cd2018-01-14 02:05:51 +0000155; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
156; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
157; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
158; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
159; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
160; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
161; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
162; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
Yael Tsafrir47668b52017-09-12 07:50:35 +0000163; AVX512F-NEXT: movq %rbp, %rsp
164; AVX512F-NEXT: popq %rbp
165; AVX512F-NEXT: retq
166;
167; AVX512BWVL-LABEL: avg_v64i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000168; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000169; AVX512BWVL-NEXT: kmovq %rdi, %k1
170; AVX512BWVL-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
171; AVX512BWVL-NEXT: vmovdqa64 %zmm2, %zmm0
172; AVX512BWVL-NEXT: retq
173 %za = zext <64 x i8> %a to <64 x i16>
174 %zb = zext <64 x i8> %b to <64 x i16>
175 %add = add nuw nsw <64 x i16> %za, %zb
176 %add1 = add nuw nsw <64 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
177 %lshr = lshr <64 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
178 %trunc = trunc <64 x i16> %lshr to <64 x i8>
179 %mask1 = bitcast i64 %mask to <64 x i1>
180 %res = select <64 x i1> %mask1, <64 x i8> %trunc, <64 x i8> %src
181 ret <64 x i8> %res
182}
183
184define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
185; AVX512F-LABEL: avg_v64i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000186; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000187; AVX512F-NEXT: pushq %rbp
188; AVX512F-NEXT: movq %rsp, %rbp
189; AVX512F-NEXT: andq $-32, %rsp
190; AVX512F-NEXT: subq $64, %rsp
Yael Tsafrir47668b52017-09-12 07:50:35 +0000191; AVX512F-NEXT: movl %edi, (%rsp)
Craig Toppere9fc0cd2018-01-14 02:05:51 +0000192; AVX512F-NEXT: shrq $32, %rdi
193; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
Simon Pilgrim4de5bb02017-12-21 18:12:31 +0000194; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1
195; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000196; AVX512F-NEXT: kmovw (%rsp), %k1
197; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
198; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
199; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
200; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
201; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
202; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
203; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
Craig Toppere9fc0cd2018-01-14 02:05:51 +0000204; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
205; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
206; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
207; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
208; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
209; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
210; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
211; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
Yael Tsafrir47668b52017-09-12 07:50:35 +0000212; AVX512F-NEXT: movq %rbp, %rsp
213; AVX512F-NEXT: popq %rbp
214; AVX512F-NEXT: retq
215;
216; AVX512BWVL-LABEL: avg_v64i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000217; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000218; AVX512BWVL-NEXT: kmovq %rdi, %k1
219; AVX512BWVL-NEXT: vpavgb %zmm1, %zmm0, %zmm0 {%k1} {z}
220; AVX512BWVL-NEXT: retq
221 %za = zext <64 x i8> %a to <64 x i16>
222 %zb = zext <64 x i8> %b to <64 x i16>
223 %add = add nuw nsw <64 x i16> %za, %zb
224 %add1 = add nuw nsw <64 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
225 %lshr = lshr <64 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
226 %trunc = trunc <64 x i16> %lshr to <64 x i8>
227 %mask1 = bitcast i64 %mask to <64 x i1>
228 %res = select <64 x i1> %mask1, <64 x i8> %trunc, <64 x i8> zeroinitializer
229 ret <64 x i8> %res
230}
231
232define <8 x i16> @avg_v8i16_mask(<8 x i16> %a, <8 x i16> %b, <8 x i16> %src, i8 %mask) nounwind {
233; AVX512F-LABEL: avg_v8i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000234; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000235; AVX512F-NEXT: vpavgw %xmm1, %xmm0, %xmm0
236; AVX512F-NEXT: kmovw %edi, %k1
Craig Toppera404ce92017-12-05 06:37:21 +0000237; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
238; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Simon Pilgrim5e8c3f32017-10-24 17:04:57 +0000239; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000240; AVX512F-NEXT: vzeroupper
241; AVX512F-NEXT: retq
242;
243; AVX512BWVL-LABEL: avg_v8i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000244; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000245; AVX512BWVL-NEXT: kmovd %edi, %k1
246; AVX512BWVL-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1}
247; AVX512BWVL-NEXT: vmovdqa %xmm2, %xmm0
248; AVX512BWVL-NEXT: retq
249 %za = zext <8 x i16> %a to <8 x i32>
250 %zb = zext <8 x i16> %b to <8 x i32>
251 %add = add nuw nsw <8 x i32> %za, %zb
252 %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
253 %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
254 %trunc = trunc <8 x i32> %lshr to <8 x i16>
255 %mask1 = bitcast i8 %mask to <8 x i1>
256 %res = select <8 x i1> %mask1, <8 x i16> %trunc, <8 x i16> %src
257 ret <8 x i16> %res
258}
259
260define <8 x i16> @avg_v8i16_maskz(<8 x i16> %a, <8 x i16> %b, i8 %mask) nounwind {
261; AVX512F-LABEL: avg_v8i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000262; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000263; AVX512F-NEXT: vpavgw %xmm1, %xmm0, %xmm0
264; AVX512F-NEXT: kmovw %edi, %k1
Craig Toppera404ce92017-12-05 06:37:21 +0000265; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
266; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Yael Tsafrir47668b52017-09-12 07:50:35 +0000267; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0
268; AVX512F-NEXT: vzeroupper
269; AVX512F-NEXT: retq
270;
271; AVX512BWVL-LABEL: avg_v8i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000272; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000273; AVX512BWVL-NEXT: kmovd %edi, %k1
274; AVX512BWVL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 {%k1} {z}
275; AVX512BWVL-NEXT: retq
276 %za = zext <8 x i16> %a to <8 x i32>
277 %zb = zext <8 x i16> %b to <8 x i32>
278 %add = add nuw nsw <8 x i32> %za, %zb
279 %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
280 %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
281 %trunc = trunc <8 x i32> %lshr to <8 x i16>
282 %mask1 = bitcast i8 %mask to <8 x i1>
283 %res = select <8 x i1> %mask1, <8 x i16> %trunc, <8 x i16> zeroinitializer
284 ret <8 x i16> %res
285}
286
287define <16 x i16> @avg_v16i16_mask(<16 x i16> %a, <16 x i16> %b, <16 x i16> %src, i16 %mask) nounwind {
288; AVX512F-LABEL: avg_v16i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000289; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000290; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
291; AVX512F-NEXT: kmovw %edi, %k1
292; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
293; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Simon Pilgrim5e8c3f32017-10-24 17:04:57 +0000294; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000295; AVX512F-NEXT: retq
296;
297; AVX512BWVL-LABEL: avg_v16i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000298; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000299; AVX512BWVL-NEXT: kmovd %edi, %k1
300; AVX512BWVL-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1}
301; AVX512BWVL-NEXT: vmovdqa %ymm2, %ymm0
302; AVX512BWVL-NEXT: retq
303 %za = zext <16 x i16> %a to <16 x i32>
304 %zb = zext <16 x i16> %b to <16 x i32>
305 %add = add nuw nsw <16 x i32> %za, %zb
306 %add1 = add nuw nsw <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
307 %lshr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
308 %trunc = trunc <16 x i32> %lshr to <16 x i16>
309 %mask1 = bitcast i16 %mask to <16 x i1>
310 %res = select <16 x i1> %mask1, <16 x i16> %trunc, <16 x i16> %src
311 ret <16 x i16> %res
312}
313
314define <16 x i16> @avg_v16i16_maskz(<16 x i16> %a, <16 x i16> %b, i16 %mask) nounwind {
315; AVX512F-LABEL: avg_v16i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000316; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000317; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
318; AVX512F-NEXT: kmovw %edi, %k1
319; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
320; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
321; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
322; AVX512F-NEXT: retq
323;
324; AVX512BWVL-LABEL: avg_v16i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000325; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000326; AVX512BWVL-NEXT: kmovd %edi, %k1
327; AVX512BWVL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 {%k1} {z}
328; AVX512BWVL-NEXT: retq
329 %za = zext <16 x i16> %a to <16 x i32>
330 %zb = zext <16 x i16> %b to <16 x i32>
331 %add = add nuw nsw <16 x i32> %za, %zb
332 %add1 = add nuw nsw <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
333 %lshr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
334 %trunc = trunc <16 x i32> %lshr to <16 x i16>
335 %mask1 = bitcast i16 %mask to <16 x i1>
336 %res = select <16 x i1> %mask1, <16 x i16> %trunc, <16 x i16> zeroinitializer
337 ret <16 x i16> %res
338}
339
340define <32 x i16> @avg_v32i16_mask(<32 x i16> %a, <32 x i16> %b, <32 x i16> %src, i32 %mask) nounwind {
341; AVX512F-LABEL: avg_v32i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000342; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000343; AVX512F-NEXT: pushq %rbp
344; AVX512F-NEXT: movq %rsp, %rbp
345; AVX512F-NEXT: andq $-32, %rsp
346; AVX512F-NEXT: subq $32, %rsp
347; AVX512F-NEXT: movl %edi, (%rsp)
348; AVX512F-NEXT: kmovw (%rsp), %k1
349; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
350; AVX512F-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
351; AVX512F-NEXT: vpmovdb %zmm6, %xmm6
352; AVX512F-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7 {%k1} {z}
353; AVX512F-NEXT: vpmovdb %zmm7, %xmm7
354; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1
355; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0
356; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
357; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
358; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
359; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
360; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
361; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
362; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
363; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
364; AVX512F-NEXT: movq %rbp, %rsp
365; AVX512F-NEXT: popq %rbp
366; AVX512F-NEXT: retq
367;
368; AVX512BWVL-LABEL: avg_v32i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000369; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000370; AVX512BWVL-NEXT: kmovd %edi, %k1
371; AVX512BWVL-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
372; AVX512BWVL-NEXT: vmovdqa64 %zmm2, %zmm0
373; AVX512BWVL-NEXT: retq
374 %za = zext <32 x i16> %a to <32 x i32>
375 %zb = zext <32 x i16> %b to <32 x i32>
376 %add = add nuw nsw <32 x i32> %za, %zb
377 %add1 = add nuw nsw <32 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
378 %lshr = lshr <32 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
379 %trunc = trunc <32 x i32> %lshr to <32 x i16>
380 %mask1 = bitcast i32 %mask to <32 x i1>
381 %res = select <32 x i1> %mask1, <32 x i16> %trunc, <32 x i16> %src
382 ret <32 x i16> %res
383}
384
385define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
386; AVX512F-LABEL: avg_v32i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000387; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000388; AVX512F-NEXT: pushq %rbp
389; AVX512F-NEXT: movq %rsp, %rbp
390; AVX512F-NEXT: andq $-32, %rsp
391; AVX512F-NEXT: subq $32, %rsp
392; AVX512F-NEXT: movl %edi, (%rsp)
393; AVX512F-NEXT: kmovw (%rsp), %k1
394; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
395; AVX512F-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z}
396; AVX512F-NEXT: vpmovdb %zmm4, %xmm4
397; AVX512F-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
398; AVX512F-NEXT: vpmovdb %zmm5, %xmm5
399; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1
400; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0
401; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
402; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
403; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
404; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
405; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
406; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
407; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
408; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
409; AVX512F-NEXT: movq %rbp, %rsp
410; AVX512F-NEXT: popq %rbp
411; AVX512F-NEXT: retq
412;
413; AVX512BWVL-LABEL: avg_v32i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000414; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000415; AVX512BWVL-NEXT: kmovd %edi, %k1
416; AVX512BWVL-NEXT: vpavgw %zmm1, %zmm0, %zmm0 {%k1} {z}
417; AVX512BWVL-NEXT: retq
418 %za = zext <32 x i16> %a to <32 x i32>
419 %zb = zext <32 x i16> %b to <32 x i32>
420 %add = add nuw nsw <32 x i32> %za, %zb
421 %add1 = add nuw nsw <32 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
422 %lshr = lshr <32 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
423 %trunc = trunc <32 x i32> %lshr to <32 x i16>
424 %mask1 = bitcast i32 %mask to <32 x i1>
425 %res = select <32 x i1> %mask1, <32 x i16> %trunc, <32 x i16> zeroinitializer
426 ret <32 x i16> %res
427}