blob: cea86091a2bb25e1b06bbc79efc62bc4362ccc65 [file] [log] [blame]
Oren Ben Simhon568fb192017-04-04 10:23:18 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
5
6define i32 @sad8_32bit_icmp_sge(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #0 {
7; SSE2-LABEL: sad8_32bit_icmp_sge:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00008; SSE2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +00009; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
10; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
11; SSE2-NEXT: psadbw %xmm0, %xmm1
12; SSE2-NEXT: movd %xmm1, %eax
13; SSE2-NEXT: retq
14;
15; AVX2-LABEL: sad8_32bit_icmp_sge:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000016; AVX2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +000017; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
18; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
19; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
20; AVX2-NEXT: vmovd %xmm0, %eax
21; AVX2-NEXT: retq
22;
23; AVX512F-LABEL: sad8_32bit_icmp_sge:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000024; AVX512F: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +000025; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
26; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
27; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
28; AVX512F-NEXT: vmovd %xmm0, %eax
29; AVX512F-NEXT: retq
30
31entry:
32 %idx.ext = zext i32 %stride to i64
33 br label %for.body
34
35for.body: ; preds = %entry
36 %0 = bitcast i8* %cur to <8 x i8>*
37 %1 = load <8 x i8>, <8 x i8>* %0, align 1
38 %2 = zext <8 x i8> %1 to <8 x i32>
39 %3 = bitcast i8* %ref to <8 x i8>*
40 %4 = load <8 x i8>, <8 x i8>* %3, align 1
41 %5 = zext <8 x i8> %4 to <8 x i32>
42 %6 = sub nsw <8 x i32> %2, %5
43 %7 = icmp sgt <8 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
44 %8 = sub nsw <8 x i32> zeroinitializer, %6
45 %9 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %8
46 %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
47 %bin.rdx = add <8 x i32> %9, %rdx.shuf
48 %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
49 %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
50 %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
51 %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
52 %10 = extractelement <8 x i32> %bin.rdx232, i32 0
53 ret i32 %10
54}
55
56define i32 @sad8_32bit_icmp_sgt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #1 {
57; SSE2-LABEL: sad8_32bit_icmp_sgt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000058; SSE2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +000059; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
60; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
61; SSE2-NEXT: psadbw %xmm0, %xmm1
62; SSE2-NEXT: movd %xmm1, %eax
63; SSE2-NEXT: retq
64;
65; AVX2-LABEL: sad8_32bit_icmp_sgt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000066; AVX2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +000067; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
68; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
69; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
70; AVX2-NEXT: vmovd %xmm0, %eax
71; AVX2-NEXT: retq
72;
73; AVX512F-LABEL: sad8_32bit_icmp_sgt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000074; AVX512F: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +000075; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
76; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
77; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
78; AVX512F-NEXT: vmovd %xmm0, %eax
79; AVX512F-NEXT: retq
80entry:
81 %idx.ext = zext i32 %stride to i64
82 br label %for.body
83
84for.body: ; preds = %entry
85 %0 = bitcast i8* %cur to <8 x i8>*
86 %1 = load <8 x i8>, <8 x i8>* %0, align 1
87 %2 = zext <8 x i8> %1 to <8 x i32>
88 %3 = bitcast i8* %ref to <8 x i8>*
89 %4 = load <8 x i8>, <8 x i8>* %3, align 1
90 %5 = zext <8 x i8> %4 to <8 x i32>
91 %6 = sub nsw <8 x i32> %2, %5
92 %7 = icmp sgt <8 x i32> %6, zeroinitializer
93 %8 = sub nsw <8 x i32> zeroinitializer, %6
94 %9 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %8
95 %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
96 %bin.rdx = add <8 x i32> %9, %rdx.shuf
97 %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
98 %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
99 %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
100 %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
101 %10 = extractelement <8 x i32> %bin.rdx232, i32 0
102 ret i32 %10
103}
104
105define i32 @sad8_32bit_icmp_sle(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #2 {
106; SSE2-LABEL: sad8_32bit_icmp_sle:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000107; SSE2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000108; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
109; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
110; SSE2-NEXT: psadbw %xmm0, %xmm1
111; SSE2-NEXT: movd %xmm1, %eax
112; SSE2-NEXT: retq
113;
114; AVX2-LABEL: sad8_32bit_icmp_sle:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000115; AVX2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000116; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
117; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
118; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
119; AVX2-NEXT: vmovd %xmm0, %eax
120; AVX2-NEXT: retq
121;
122; AVX512F-LABEL: sad8_32bit_icmp_sle:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000123; AVX512F: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000124; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
125; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
126; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
127; AVX512F-NEXT: vmovd %xmm0, %eax
128; AVX512F-NEXT: retq
129entry:
130 %idx.ext = zext i32 %stride to i64
131 br label %for.body
132
133for.body: ; preds = %entry
134 %0 = bitcast i8* %cur to <8 x i8>*
135 %1 = load <8 x i8>, <8 x i8>* %0, align 1
136 %2 = zext <8 x i8> %1 to <8 x i32>
137 %3 = bitcast i8* %ref to <8 x i8>*
138 %4 = load <8 x i8>, <8 x i8>* %3, align 1
139 %5 = zext <8 x i8> %4 to <8 x i32>
140 %6 = sub nsw <8 x i32> %2, %5
141 %7 = icmp slt <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
142 %8 = sub nsw <8 x i32> zeroinitializer, %6
143 %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
144 %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
145 %bin.rdx = add <8 x i32> %9, %rdx.shuf
146 %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
147 %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
148 %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
149 %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
150 %10 = extractelement <8 x i32> %bin.rdx232, i32 0
151 ret i32 %10
152}
153
154define i32 @sad8_32bit_icmp_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #3 {
155; SSE2-LABEL: sad8_32bit_icmp_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000156; SSE2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000157; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
158; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
159; SSE2-NEXT: psadbw %xmm0, %xmm1
160; SSE2-NEXT: movd %xmm1, %eax
161; SSE2-NEXT: retq
162;
163; AVX2-LABEL: sad8_32bit_icmp_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000164; AVX2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000165; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
166; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
167; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
168; AVX2-NEXT: vmovd %xmm0, %eax
169; AVX2-NEXT: retq
170;
171; AVX512F-LABEL: sad8_32bit_icmp_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000172; AVX512F: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000173; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
174; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
175; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
176; AVX512F-NEXT: vmovd %xmm0, %eax
177; AVX512F-NEXT: retq
178entry:
179 %idx.ext = zext i32 %stride to i64
180 br label %for.body
181
182for.body: ; preds = %entry
183 %0 = bitcast i8* %cur to <8 x i8>*
184 %1 = load <8 x i8>, <8 x i8>* %0, align 1
185 %2 = zext <8 x i8> %1 to <8 x i32>
186 %3 = bitcast i8* %ref to <8 x i8>*
187 %4 = load <8 x i8>, <8 x i8>* %3, align 1
188 %5 = zext <8 x i8> %4 to <8 x i32>
189 %6 = sub nsw <8 x i32> %2, %5
190 %7 = icmp slt <8 x i32> %6, zeroinitializer
191 %8 = sub nsw <8 x i32> zeroinitializer, %6
192 %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
193 %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
194 %bin.rdx = add <8 x i32> %9, %rdx.shuf
195 %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
196 %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
197 %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
198 %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
199 %10 = extractelement <8 x i32> %bin.rdx232, i32 0
200 ret i32 %10
201}
202
203define i64 @sad8_64bit_icmp_sext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
204; SSE2-LABEL: sad8_64bit_icmp_sext_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000205; SSE2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000206; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
207; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
208; SSE2-NEXT: psadbw %xmm0, %xmm1
Ayman Musad9fb1572017-04-26 07:08:44 +0000209; SSE2-NEXT: movq %xmm1, %rax
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000210; SSE2-NEXT: retq
211;
212; AVX2-LABEL: sad8_64bit_icmp_sext_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000213; AVX2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000214; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
215; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
216; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
217; AVX2-NEXT: vmovq %xmm0, %rax
218; AVX2-NEXT: retq
219;
220; AVX512F-LABEL: sad8_64bit_icmp_sext_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000221; AVX512F: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000222; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
223; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
224; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
225; AVX512F-NEXT: vmovq %xmm0, %rax
226; AVX512F-NEXT: retq
227entry:
228 br label %for.body
229
230for.body: ; preds = %entry
231 %0 = bitcast i8* %cur to <8 x i8>*
232 %1 = load <8 x i8>, <8 x i8>* %0, align 1
233 %2 = zext <8 x i8> %1 to <8 x i32>
234 %3 = bitcast i8* %ref to <8 x i8>*
235 %4 = load <8 x i8>, <8 x i8>* %3, align 1
236 %5 = zext <8 x i8> %4 to <8 x i32>
237 %6 = sub nsw <8 x i32> %2, %5
238 %7 = icmp slt <8 x i32> %6, zeroinitializer
239 %8 = sub nsw <8 x i32> zeroinitializer, %6
240 %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
241 %10 = sext <8 x i32> %9 to <8 x i64>
242 %rdx.shuf = shufflevector <8 x i64> %10, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
243 %bin.rdx = add <8 x i64> %rdx.shuf, %10
244 %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
245 %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
246 %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
247 %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
248 %11 = extractelement <8 x i64> %bin.rdx239, i32 0
249 ret i64 %11
250}
251
252define i64 @sad8_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
253; SSE2-LABEL: sad8_64bit_icmp_zext_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000254; SSE2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000255; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
256; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
257; SSE2-NEXT: psadbw %xmm0, %xmm1
Ayman Musad9fb1572017-04-26 07:08:44 +0000258; SSE2-NEXT: movq %xmm1, %rax
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000259; SSE2-NEXT: retq
260;
261; AVX2-LABEL: sad8_64bit_icmp_zext_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000262; AVX2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000263; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
264; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
265; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
266; AVX2-NEXT: vmovq %xmm0, %rax
267; AVX2-NEXT: retq
268;
269; AVX512F-LABEL: sad8_64bit_icmp_zext_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000270; AVX512F: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000271; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
272; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
273; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
274; AVX512F-NEXT: vmovq %xmm0, %rax
275; AVX512F-NEXT: retq
276entry:
277 br label %for.body
278
279for.body: ; preds = %entry
280 %0 = bitcast i8* %cur to <8 x i8>*
281 %1 = load <8 x i8>, <8 x i8>* %0, align 1
282 %2 = zext <8 x i8> %1 to <8 x i32>
283 %3 = bitcast i8* %ref to <8 x i8>*
284 %4 = load <8 x i8>, <8 x i8>* %3, align 1
285 %5 = zext <8 x i8> %4 to <8 x i32>
286 %6 = sub nsw <8 x i32> %2, %5
287 %7 = icmp slt <8 x i32> %6, zeroinitializer
288 %8 = sub nsw <8 x i32> zeroinitializer, %6
289 %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
290 %10 = zext <8 x i32> %9 to <8 x i64>
291 %rdx.shuf = shufflevector <8 x i64> %10, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
292 %bin.rdx = add <8 x i64> %rdx.shuf, %10
293 %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
294 %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
295 %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
296 %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
297 %11 = extractelement <8 x i64> %bin.rdx239, i32 0
298 ret i64 %11
299}
300
301define i64 @sad8_early_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
302; SSE2-LABEL: sad8_early_64bit_icmp_zext_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000303; SSE2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000304; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
305; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
306; SSE2-NEXT: psadbw %xmm0, %xmm1
Ayman Musad9fb1572017-04-26 07:08:44 +0000307; SSE2-NEXT: movq %xmm1, %rax
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000308; SSE2-NEXT: retq
309;
310; AVX2-LABEL: sad8_early_64bit_icmp_zext_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000311; AVX2: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000312; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
313; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
314; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
315; AVX2-NEXT: vmovq %xmm0, %rax
316; AVX2-NEXT: retq
317;
318; AVX512F-LABEL: sad8_early_64bit_icmp_zext_slt:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000319; AVX512F: # %bb.0: # %entry
Oren Ben Simhon568fb192017-04-04 10:23:18 +0000320; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
321; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
322; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
323; AVX512F-NEXT: vmovq %xmm0, %rax
324; AVX512F-NEXT: retq
325entry:
326 br label %for.body
327
328for.body: ; preds = %entry
329 %0 = bitcast i8* %cur to <8 x i8>*
330 %1 = load <8 x i8>, <8 x i8>* %0, align 1
331 %2 = zext <8 x i8> %1 to <8 x i64>
332 %3 = bitcast i8* %ref to <8 x i8>*
333 %4 = load <8 x i8>, <8 x i8>* %3, align 1
334 %5 = zext <8 x i8> %4 to <8 x i64>
335 %6 = sub nsw <8 x i64> %2, %5
336 %7 = icmp slt <8 x i64> %6, zeroinitializer
337 %8 = sub nsw <8 x i64> zeroinitializer, %6
338 %9 = select <8 x i1> %7, <8 x i64> %8, <8 x i64> %6
339 %rdx.shuf = shufflevector <8 x i64> %9, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
340 %bin.rdx = add <8 x i64> %rdx.shuf, %9
341 %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
342 %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
343 %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
344 %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
345 %10 = extractelement <8 x i64> %bin.rdx239, i32 0
346 ret i64 %10
347}