blob: d32b0e70791a3e65a32a614ca71867c5047a3a0a [file] [log] [blame]
Yael Tsafrir47668b52017-09-12 07:50:35 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BWVL
4
5define <16 x i8> @avg_v16i8_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %src, i16 %mask) nounwind {
6; AVX512F-LABEL: avg_v16i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00007; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +00008; AVX512F-NEXT: vpavgb %xmm1, %xmm0, %xmm0
9; AVX512F-NEXT: kmovw %edi, %k1
10; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
11; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
12; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0
13; AVX512F-NEXT: vzeroupper
14; AVX512F-NEXT: retq
15;
16; AVX512BWVL-LABEL: avg_v16i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000017; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000018; AVX512BWVL-NEXT: kmovd %edi, %k1
19; AVX512BWVL-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1}
20; AVX512BWVL-NEXT: vmovdqa %xmm2, %xmm0
21; AVX512BWVL-NEXT: retq
22 %za = zext <16 x i8> %a to <16 x i16>
23 %zb = zext <16 x i8> %b to <16 x i16>
24 %add = add nuw nsw <16 x i16> %za, %zb
25 %add1 = add nuw nsw <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
26 %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
27 %trunc = trunc <16 x i16> %lshr to <16 x i8>
28 %mask1 = bitcast i16 %mask to <16 x i1>
29 %res = select <16 x i1> %mask1, <16 x i8> %trunc, <16 x i8> %src
30 ret <16 x i8> %res
31}
32
33define <16 x i8> @avg_v16i8_maskz(<16 x i8> %a, <16 x i8> %b, i16 %mask) nounwind {
34; AVX512F-LABEL: avg_v16i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000035; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000036; AVX512F-NEXT: vpavgb %xmm1, %xmm0, %xmm0
37; AVX512F-NEXT: kmovw %edi, %k1
38; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
39; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
40; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0
41; AVX512F-NEXT: vzeroupper
42; AVX512F-NEXT: retq
43;
44; AVX512BWVL-LABEL: avg_v16i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000045; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000046; AVX512BWVL-NEXT: kmovd %edi, %k1
47; AVX512BWVL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 {%k1} {z}
48; AVX512BWVL-NEXT: retq
49 %za = zext <16 x i8> %a to <16 x i16>
50 %zb = zext <16 x i8> %b to <16 x i16>
51 %add = add nuw nsw <16 x i16> %za, %zb
52 %add1 = add nuw nsw <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
53 %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
54 %trunc = trunc <16 x i16> %lshr to <16 x i8>
55 %mask1 = bitcast i16 %mask to <16 x i1>
56 %res = select <16 x i1> %mask1, <16 x i8> %trunc, <16 x i8> zeroinitializer
57 ret <16 x i8> %res
58}
59
60define <32 x i8> @avg_v32i8_mask(<32 x i8> %a, <32 x i8> %b, <32 x i8> %src, i32 %mask) nounwind {
61; AVX512F-LABEL: avg_v32i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000062; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000063; AVX512F-NEXT: pushq %rbp
64; AVX512F-NEXT: movq %rsp, %rbp
65; AVX512F-NEXT: andq $-32, %rsp
66; AVX512F-NEXT: subq $32, %rsp
67; AVX512F-NEXT: movl %edi, (%rsp)
68; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
69; AVX512F-NEXT: kmovw (%rsp), %k1
70; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
71; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
72; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
73; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
74; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
75; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
76; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
77; AVX512F-NEXT: movq %rbp, %rsp
78; AVX512F-NEXT: popq %rbp
79; AVX512F-NEXT: retq
80;
81; AVX512BWVL-LABEL: avg_v32i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000082; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +000083; AVX512BWVL-NEXT: kmovd %edi, %k1
84; AVX512BWVL-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1}
85; AVX512BWVL-NEXT: vmovdqa %ymm2, %ymm0
86; AVX512BWVL-NEXT: retq
87 %za = zext <32 x i8> %a to <32 x i16>
88 %zb = zext <32 x i8> %b to <32 x i16>
89 %add = add nuw nsw <32 x i16> %za, %zb
90 %add1 = add nuw nsw <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
91 %lshr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
92 %trunc = trunc <32 x i16> %lshr to <32 x i8>
93 %mask1 = bitcast i32 %mask to <32 x i1>
94 %res = select <32 x i1> %mask1, <32 x i8> %trunc, <32 x i8> %src
95 ret <32 x i8> %res
96}
97
98define <32 x i8> @avg_v32i8_maskz(<32 x i8> %a, <32 x i8> %b, i32 %mask) nounwind {
99; AVX512F-LABEL: avg_v32i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000101; AVX512F-NEXT: pushq %rbp
102; AVX512F-NEXT: movq %rsp, %rbp
103; AVX512F-NEXT: andq $-32, %rsp
104; AVX512F-NEXT: subq $32, %rsp
105; AVX512F-NEXT: movl %edi, (%rsp)
106; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
107; AVX512F-NEXT: kmovw (%rsp), %k1
108; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
109; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
110; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
111; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
112; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
113; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
114; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
115; AVX512F-NEXT: movq %rbp, %rsp
116; AVX512F-NEXT: popq %rbp
117; AVX512F-NEXT: retq
118;
119; AVX512BWVL-LABEL: avg_v32i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000120; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000121; AVX512BWVL-NEXT: kmovd %edi, %k1
122; AVX512BWVL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 {%k1} {z}
123; AVX512BWVL-NEXT: retq
124 %za = zext <32 x i8> %a to <32 x i16>
125 %zb = zext <32 x i8> %b to <32 x i16>
126 %add = add nuw nsw <32 x i16> %za, %zb
127 %add1 = add nuw nsw <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
128 %lshr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
129 %trunc = trunc <32 x i16> %lshr to <32 x i8>
130 %mask1 = bitcast i32 %mask to <32 x i1>
131 %res = select <32 x i1> %mask1, <32 x i8> %trunc, <32 x i8> zeroinitializer
132 ret <32 x i8> %res
133}
134
135define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64 %mask) nounwind {
136; AVX512F-LABEL: avg_v64i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000137; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000138; AVX512F-NEXT: pushq %rbp
139; AVX512F-NEXT: movq %rsp, %rbp
140; AVX512F-NEXT: andq $-32, %rsp
141; AVX512F-NEXT: subq $64, %rsp
142; AVX512F-NEXT: movq %rdi, %rax
143; AVX512F-NEXT: shrq $32, %rax
144; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp)
145; AVX512F-NEXT: movl %edi, (%rsp)
Simon Pilgrim4de5bb02017-12-21 18:12:31 +0000146; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1
147; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000148; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
149; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
150; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
151; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
152; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
153; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
154; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
155; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
156; AVX512F-NEXT: kmovw (%rsp), %k1
157; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
158; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
159; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
160; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
161; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
162; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
163; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
164; AVX512F-NEXT: movq %rbp, %rsp
165; AVX512F-NEXT: popq %rbp
166; AVX512F-NEXT: retq
167;
168; AVX512BWVL-LABEL: avg_v64i8_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000169; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000170; AVX512BWVL-NEXT: kmovq %rdi, %k1
171; AVX512BWVL-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
172; AVX512BWVL-NEXT: vmovdqa64 %zmm2, %zmm0
173; AVX512BWVL-NEXT: retq
174 %za = zext <64 x i8> %a to <64 x i16>
175 %zb = zext <64 x i8> %b to <64 x i16>
176 %add = add nuw nsw <64 x i16> %za, %zb
177 %add1 = add nuw nsw <64 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
178 %lshr = lshr <64 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
179 %trunc = trunc <64 x i16> %lshr to <64 x i8>
180 %mask1 = bitcast i64 %mask to <64 x i1>
181 %res = select <64 x i1> %mask1, <64 x i8> %trunc, <64 x i8> %src
182 ret <64 x i8> %res
183}
184
185define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
186; AVX512F-LABEL: avg_v64i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000187; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000188; AVX512F-NEXT: pushq %rbp
189; AVX512F-NEXT: movq %rsp, %rbp
190; AVX512F-NEXT: andq $-32, %rsp
191; AVX512F-NEXT: subq $64, %rsp
192; AVX512F-NEXT: movq %rdi, %rax
193; AVX512F-NEXT: shrq $32, %rax
194; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp)
195; AVX512F-NEXT: movl %edi, (%rsp)
Simon Pilgrim4de5bb02017-12-21 18:12:31 +0000196; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1
197; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000198; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
199; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
200; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
201; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
202; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
203; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
204; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
205; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
206; AVX512F-NEXT: kmovw (%rsp), %k1
207; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
208; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
209; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
210; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
211; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
212; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
213; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
214; AVX512F-NEXT: movq %rbp, %rsp
215; AVX512F-NEXT: popq %rbp
216; AVX512F-NEXT: retq
217;
218; AVX512BWVL-LABEL: avg_v64i8_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000219; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000220; AVX512BWVL-NEXT: kmovq %rdi, %k1
221; AVX512BWVL-NEXT: vpavgb %zmm1, %zmm0, %zmm0 {%k1} {z}
222; AVX512BWVL-NEXT: retq
223 %za = zext <64 x i8> %a to <64 x i16>
224 %zb = zext <64 x i8> %b to <64 x i16>
225 %add = add nuw nsw <64 x i16> %za, %zb
226 %add1 = add nuw nsw <64 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
227 %lshr = lshr <64 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
228 %trunc = trunc <64 x i16> %lshr to <64 x i8>
229 %mask1 = bitcast i64 %mask to <64 x i1>
230 %res = select <64 x i1> %mask1, <64 x i8> %trunc, <64 x i8> zeroinitializer
231 ret <64 x i8> %res
232}
233
234define <8 x i16> @avg_v8i16_mask(<8 x i16> %a, <8 x i16> %b, <8 x i16> %src, i8 %mask) nounwind {
235; AVX512F-LABEL: avg_v8i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000236; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000237; AVX512F-NEXT: vpavgw %xmm1, %xmm0, %xmm0
238; AVX512F-NEXT: kmovw %edi, %k1
Craig Toppera404ce92017-12-05 06:37:21 +0000239; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
240; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Simon Pilgrim5e8c3f32017-10-24 17:04:57 +0000241; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000242; AVX512F-NEXT: vzeroupper
243; AVX512F-NEXT: retq
244;
245; AVX512BWVL-LABEL: avg_v8i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000246; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000247; AVX512BWVL-NEXT: kmovd %edi, %k1
248; AVX512BWVL-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1}
249; AVX512BWVL-NEXT: vmovdqa %xmm2, %xmm0
250; AVX512BWVL-NEXT: retq
251 %za = zext <8 x i16> %a to <8 x i32>
252 %zb = zext <8 x i16> %b to <8 x i32>
253 %add = add nuw nsw <8 x i32> %za, %zb
254 %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
255 %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
256 %trunc = trunc <8 x i32> %lshr to <8 x i16>
257 %mask1 = bitcast i8 %mask to <8 x i1>
258 %res = select <8 x i1> %mask1, <8 x i16> %trunc, <8 x i16> %src
259 ret <8 x i16> %res
260}
261
262define <8 x i16> @avg_v8i16_maskz(<8 x i16> %a, <8 x i16> %b, i8 %mask) nounwind {
263; AVX512F-LABEL: avg_v8i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000264; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000265; AVX512F-NEXT: vpavgw %xmm1, %xmm0, %xmm0
266; AVX512F-NEXT: kmovw %edi, %k1
Craig Toppera404ce92017-12-05 06:37:21 +0000267; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
268; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Yael Tsafrir47668b52017-09-12 07:50:35 +0000269; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0
270; AVX512F-NEXT: vzeroupper
271; AVX512F-NEXT: retq
272;
273; AVX512BWVL-LABEL: avg_v8i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000274; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000275; AVX512BWVL-NEXT: kmovd %edi, %k1
276; AVX512BWVL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 {%k1} {z}
277; AVX512BWVL-NEXT: retq
278 %za = zext <8 x i16> %a to <8 x i32>
279 %zb = zext <8 x i16> %b to <8 x i32>
280 %add = add nuw nsw <8 x i32> %za, %zb
281 %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
282 %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
283 %trunc = trunc <8 x i32> %lshr to <8 x i16>
284 %mask1 = bitcast i8 %mask to <8 x i1>
285 %res = select <8 x i1> %mask1, <8 x i16> %trunc, <8 x i16> zeroinitializer
286 ret <8 x i16> %res
287}
288
289define <16 x i16> @avg_v16i16_mask(<16 x i16> %a, <16 x i16> %b, <16 x i16> %src, i16 %mask) nounwind {
290; AVX512F-LABEL: avg_v16i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000291; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000292; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
293; AVX512F-NEXT: kmovw %edi, %k1
294; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
295; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Simon Pilgrim5e8c3f32017-10-24 17:04:57 +0000296; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
Yael Tsafrir47668b52017-09-12 07:50:35 +0000297; AVX512F-NEXT: retq
298;
299; AVX512BWVL-LABEL: avg_v16i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000300; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000301; AVX512BWVL-NEXT: kmovd %edi, %k1
302; AVX512BWVL-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1}
303; AVX512BWVL-NEXT: vmovdqa %ymm2, %ymm0
304; AVX512BWVL-NEXT: retq
305 %za = zext <16 x i16> %a to <16 x i32>
306 %zb = zext <16 x i16> %b to <16 x i32>
307 %add = add nuw nsw <16 x i32> %za, %zb
308 %add1 = add nuw nsw <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
309 %lshr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
310 %trunc = trunc <16 x i32> %lshr to <16 x i16>
311 %mask1 = bitcast i16 %mask to <16 x i1>
312 %res = select <16 x i1> %mask1, <16 x i16> %trunc, <16 x i16> %src
313 ret <16 x i16> %res
314}
315
316define <16 x i16> @avg_v16i16_maskz(<16 x i16> %a, <16 x i16> %b, i16 %mask) nounwind {
317; AVX512F-LABEL: avg_v16i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000318; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000319; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
320; AVX512F-NEXT: kmovw %edi, %k1
321; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
322; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
323; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
324; AVX512F-NEXT: retq
325;
326; AVX512BWVL-LABEL: avg_v16i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000327; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000328; AVX512BWVL-NEXT: kmovd %edi, %k1
329; AVX512BWVL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 {%k1} {z}
330; AVX512BWVL-NEXT: retq
331 %za = zext <16 x i16> %a to <16 x i32>
332 %zb = zext <16 x i16> %b to <16 x i32>
333 %add = add nuw nsw <16 x i32> %za, %zb
334 %add1 = add nuw nsw <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
335 %lshr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
336 %trunc = trunc <16 x i32> %lshr to <16 x i16>
337 %mask1 = bitcast i16 %mask to <16 x i1>
338 %res = select <16 x i1> %mask1, <16 x i16> %trunc, <16 x i16> zeroinitializer
339 ret <16 x i16> %res
340}
341
342define <32 x i16> @avg_v32i16_mask(<32 x i16> %a, <32 x i16> %b, <32 x i16> %src, i32 %mask) nounwind {
343; AVX512F-LABEL: avg_v32i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000344; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000345; AVX512F-NEXT: pushq %rbp
346; AVX512F-NEXT: movq %rsp, %rbp
347; AVX512F-NEXT: andq $-32, %rsp
348; AVX512F-NEXT: subq $32, %rsp
349; AVX512F-NEXT: movl %edi, (%rsp)
350; AVX512F-NEXT: kmovw (%rsp), %k1
351; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
352; AVX512F-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
353; AVX512F-NEXT: vpmovdb %zmm6, %xmm6
354; AVX512F-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7 {%k1} {z}
355; AVX512F-NEXT: vpmovdb %zmm7, %xmm7
356; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1
357; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0
358; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
359; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
360; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
361; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
362; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
363; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
364; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
365; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
366; AVX512F-NEXT: movq %rbp, %rsp
367; AVX512F-NEXT: popq %rbp
368; AVX512F-NEXT: retq
369;
370; AVX512BWVL-LABEL: avg_v32i16_mask:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000371; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000372; AVX512BWVL-NEXT: kmovd %edi, %k1
373; AVX512BWVL-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
374; AVX512BWVL-NEXT: vmovdqa64 %zmm2, %zmm0
375; AVX512BWVL-NEXT: retq
376 %za = zext <32 x i16> %a to <32 x i32>
377 %zb = zext <32 x i16> %b to <32 x i32>
378 %add = add nuw nsw <32 x i32> %za, %zb
379 %add1 = add nuw nsw <32 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
380 %lshr = lshr <32 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
381 %trunc = trunc <32 x i32> %lshr to <32 x i16>
382 %mask1 = bitcast i32 %mask to <32 x i1>
383 %res = select <32 x i1> %mask1, <32 x i16> %trunc, <32 x i16> %src
384 ret <32 x i16> %res
385}
386
387define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
388; AVX512F-LABEL: avg_v32i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000389; AVX512F: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000390; AVX512F-NEXT: pushq %rbp
391; AVX512F-NEXT: movq %rsp, %rbp
392; AVX512F-NEXT: andq $-32, %rsp
393; AVX512F-NEXT: subq $32, %rsp
394; AVX512F-NEXT: movl %edi, (%rsp)
395; AVX512F-NEXT: kmovw (%rsp), %k1
396; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
397; AVX512F-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z}
398; AVX512F-NEXT: vpmovdb %zmm4, %xmm4
399; AVX512F-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
400; AVX512F-NEXT: vpmovdb %zmm5, %xmm5
401; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1
402; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0
403; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
404; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
405; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
406; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
407; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
408; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
409; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
410; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
411; AVX512F-NEXT: movq %rbp, %rsp
412; AVX512F-NEXT: popq %rbp
413; AVX512F-NEXT: retq
414;
415; AVX512BWVL-LABEL: avg_v32i16_maskz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000416; AVX512BWVL: # %bb.0:
Yael Tsafrir47668b52017-09-12 07:50:35 +0000417; AVX512BWVL-NEXT: kmovd %edi, %k1
418; AVX512BWVL-NEXT: vpavgw %zmm1, %zmm0, %zmm0 {%k1} {z}
419; AVX512BWVL-NEXT: retq
420 %za = zext <32 x i16> %a to <32 x i32>
421 %zb = zext <32 x i16> %b to <32 x i32>
422 %add = add nuw nsw <32 x i32> %za, %zb
423 %add1 = add nuw nsw <32 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
424 %lshr = lshr <32 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
425 %trunc = trunc <32 x i32> %lshr to <32 x i16>
426 %mask1 = bitcast i32 %mask to <32 x i1>
427 %res = select <32 x i1> %mask1, <32 x i16> %trunc, <32 x i16> zeroinitializer
428 ret <32 x i16> %res
429}