blob: e689f4af176835a1981d6de70d8db4f36c61c044 [file] [log] [blame]
Simon Pilgrim829091e2015-08-13 20:31:03 +00001; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
8
9;
10; Unsigned Maximum (GT)
11;
12
13define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
14; SSE2-LABEL: max_gt_v2i64:
15; SSE2: # BB#0:
16; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
17; SSE2-NEXT: movdqa %xmm1, %xmm3
18; SSE2-NEXT: pxor %xmm2, %xmm3
19; SSE2-NEXT: pxor %xmm0, %xmm2
20; SSE2-NEXT: movdqa %xmm2, %xmm4
21; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
22; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
23; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
24; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
25; SSE2-NEXT: pand %xmm5, %xmm2
26; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
27; SSE2-NEXT: por %xmm2, %xmm3
28; SSE2-NEXT: pand %xmm3, %xmm0
29; SSE2-NEXT: pandn %xmm1, %xmm3
30; SSE2-NEXT: por %xmm3, %xmm0
31; SSE2-NEXT: retq
32;
33; SSE41-LABEL: max_gt_v2i64:
34; SSE41: # BB#0:
35; SSE41-NEXT: movdqa %xmm0, %xmm2
36; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
37; SSE41-NEXT: movdqa %xmm1, %xmm3
38; SSE41-NEXT: pxor %xmm0, %xmm3
39; SSE41-NEXT: pxor %xmm2, %xmm0
40; SSE41-NEXT: movdqa %xmm0, %xmm4
41; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
42; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
43; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
44; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
45; SSE41-NEXT: pand %xmm5, %xmm3
46; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
47; SSE41-NEXT: por %xmm3, %xmm0
48; SSE41-NEXT: blendvpd %xmm2, %xmm1
49; SSE41-NEXT: movapd %xmm1, %xmm0
50; SSE41-NEXT: retq
51;
52; SSE42-LABEL: max_gt_v2i64:
53; SSE42: # BB#0:
54; SSE42-NEXT: movdqa %xmm0, %xmm2
55; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
56; SSE42-NEXT: movdqa %xmm1, %xmm3
57; SSE42-NEXT: pxor %xmm0, %xmm3
58; SSE42-NEXT: pxor %xmm2, %xmm0
59; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
60; SSE42-NEXT: blendvpd %xmm2, %xmm1
61; SSE42-NEXT: movapd %xmm1, %xmm0
62; SSE42-NEXT: retq
63;
64; AVX-LABEL: max_gt_v2i64:
65; AVX: # BB#0:
66; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
67; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3
68; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2
69; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
70; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
71; AVX-NEXT: retq
72 %1 = icmp ugt <2 x i64> %a, %b
73 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
74 ret <2 x i64> %2
75}
76
77define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
78; SSE2-LABEL: max_gt_v4i64:
79; SSE2: # BB#0:
80; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
81; SSE2-NEXT: movdqa %xmm3, %xmm5
82; SSE2-NEXT: pxor %xmm4, %xmm5
83; SSE2-NEXT: movdqa %xmm1, %xmm6
84; SSE2-NEXT: pxor %xmm4, %xmm6
85; SSE2-NEXT: movdqa %xmm6, %xmm7
86; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
87; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
88; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
89; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
90; SSE2-NEXT: pand %xmm8, %xmm5
91; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
92; SSE2-NEXT: por %xmm5, %xmm6
93; SSE2-NEXT: movdqa %xmm2, %xmm5
94; SSE2-NEXT: pxor %xmm4, %xmm5
95; SSE2-NEXT: pxor %xmm0, %xmm4
96; SSE2-NEXT: movdqa %xmm4, %xmm7
97; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
98; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
99; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
100; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
101; SSE2-NEXT: pand %xmm8, %xmm4
102; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
103; SSE2-NEXT: por %xmm4, %xmm5
104; SSE2-NEXT: pand %xmm5, %xmm0
105; SSE2-NEXT: pandn %xmm2, %xmm5
106; SSE2-NEXT: por %xmm5, %xmm0
107; SSE2-NEXT: pand %xmm6, %xmm1
108; SSE2-NEXT: pandn %xmm3, %xmm6
109; SSE2-NEXT: por %xmm6, %xmm1
110; SSE2-NEXT: retq
111;
112; SSE41-LABEL: max_gt_v4i64:
113; SSE41: # BB#0:
114; SSE41-NEXT: movdqa %xmm0, %xmm8
115; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
116; SSE41-NEXT: movdqa %xmm3, %xmm5
117; SSE41-NEXT: pxor %xmm0, %xmm5
118; SSE41-NEXT: movdqa %xmm1, %xmm6
119; SSE41-NEXT: pxor %xmm0, %xmm6
120; SSE41-NEXT: movdqa %xmm6, %xmm7
121; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
122; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
123; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
124; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
125; SSE41-NEXT: pand %xmm4, %xmm6
126; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
127; SSE41-NEXT: por %xmm6, %xmm5
128; SSE41-NEXT: movdqa %xmm2, %xmm4
129; SSE41-NEXT: pxor %xmm0, %xmm4
130; SSE41-NEXT: pxor %xmm8, %xmm0
131; SSE41-NEXT: movdqa %xmm0, %xmm6
132; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
133; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
134; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
135; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
136; SSE41-NEXT: pand %xmm7, %xmm4
137; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
138; SSE41-NEXT: por %xmm4, %xmm0
139; SSE41-NEXT: blendvpd %xmm8, %xmm2
140; SSE41-NEXT: movdqa %xmm5, %xmm0
141; SSE41-NEXT: blendvpd %xmm1, %xmm3
142; SSE41-NEXT: movapd %xmm2, %xmm0
143; SSE41-NEXT: movapd %xmm3, %xmm1
144; SSE41-NEXT: retq
145;
146; SSE42-LABEL: max_gt_v4i64:
147; SSE42: # BB#0:
148; SSE42-NEXT: movdqa %xmm0, %xmm4
149; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
150; SSE42-NEXT: movdqa %xmm3, %xmm6
151; SSE42-NEXT: pxor %xmm0, %xmm6
152; SSE42-NEXT: movdqa %xmm1, %xmm5
153; SSE42-NEXT: pxor %xmm0, %xmm5
154; SSE42-NEXT: pcmpgtq %xmm6, %xmm5
155; SSE42-NEXT: movdqa %xmm2, %xmm6
156; SSE42-NEXT: pxor %xmm0, %xmm6
157; SSE42-NEXT: pxor %xmm4, %xmm0
158; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
159; SSE42-NEXT: blendvpd %xmm4, %xmm2
160; SSE42-NEXT: movdqa %xmm5, %xmm0
161; SSE42-NEXT: blendvpd %xmm1, %xmm3
162; SSE42-NEXT: movapd %xmm2, %xmm0
163; SSE42-NEXT: movapd %xmm3, %xmm1
164; SSE42-NEXT: retq
165;
166; AVX1-LABEL: max_gt_v4i64:
167; AVX1: # BB#0:
168; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
169; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
170; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
171; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
172; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
173; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
174; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4
175; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3
176; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
177; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
178; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
179; AVX1-NEXT: retq
180;
181; AVX2-LABEL: max_gt_v4i64:
182; AVX2: # BB#0:
183; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
184; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
185; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
186; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
187; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
188; AVX2-NEXT: retq
189;
190; AVX512-LABEL: max_gt_v4i64:
191; AVX512: # BB#0:
192; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
193; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3
194; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2
195; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
196; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
197; AVX512-NEXT: retq
198 %1 = icmp ugt <4 x i64> %a, %b
199 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
200 ret <4 x i64> %2
201}
202
203define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
204; SSE2-LABEL: max_gt_v4i32:
205; SSE2: # BB#0:
206; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
207; SSE2-NEXT: movdqa %xmm1, %xmm3
208; SSE2-NEXT: pxor %xmm2, %xmm3
209; SSE2-NEXT: pxor %xmm0, %xmm2
210; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
211; SSE2-NEXT: pand %xmm2, %xmm0
212; SSE2-NEXT: pandn %xmm1, %xmm2
213; SSE2-NEXT: por %xmm2, %xmm0
214; SSE2-NEXT: retq
215;
216; SSE41-LABEL: max_gt_v4i32:
217; SSE41: # BB#0:
218; SSE41-NEXT: pmaxud %xmm1, %xmm0
219; SSE41-NEXT: retq
220;
221; SSE42-LABEL: max_gt_v4i32:
222; SSE42: # BB#0:
223; SSE42-NEXT: pmaxud %xmm1, %xmm0
224; SSE42-NEXT: retq
225;
226; AVX-LABEL: max_gt_v4i32:
227; AVX: # BB#0:
228; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
229; AVX-NEXT: retq
230 %1 = icmp ugt <4 x i32> %a, %b
231 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
232 ret <4 x i32> %2
233}
234
235define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
236; SSE2-LABEL: max_gt_v8i32:
237; SSE2: # BB#0:
238; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
239; SSE2-NEXT: movdqa %xmm3, %xmm6
240; SSE2-NEXT: pxor %xmm5, %xmm6
241; SSE2-NEXT: movdqa %xmm1, %xmm4
242; SSE2-NEXT: pxor %xmm5, %xmm4
243; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
244; SSE2-NEXT: movdqa %xmm2, %xmm6
245; SSE2-NEXT: pxor %xmm5, %xmm6
246; SSE2-NEXT: pxor %xmm0, %xmm5
247; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
248; SSE2-NEXT: pand %xmm5, %xmm0
249; SSE2-NEXT: pandn %xmm2, %xmm5
250; SSE2-NEXT: por %xmm5, %xmm0
251; SSE2-NEXT: pand %xmm4, %xmm1
252; SSE2-NEXT: pandn %xmm3, %xmm4
253; SSE2-NEXT: por %xmm1, %xmm4
254; SSE2-NEXT: movdqa %xmm4, %xmm1
255; SSE2-NEXT: retq
256;
257; SSE41-LABEL: max_gt_v8i32:
258; SSE41: # BB#0:
259; SSE41-NEXT: pmaxud %xmm2, %xmm0
260; SSE41-NEXT: pmaxud %xmm3, %xmm1
261; SSE41-NEXT: retq
262;
263; SSE42-LABEL: max_gt_v8i32:
264; SSE42: # BB#0:
265; SSE42-NEXT: pmaxud %xmm2, %xmm0
266; SSE42-NEXT: pmaxud %xmm3, %xmm1
267; SSE42-NEXT: retq
268;
269; AVX1-LABEL: max_gt_v8i32:
270; AVX1: # BB#0:
271; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
272; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
273; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
274; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
275; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
276; AVX1-NEXT: retq
277;
278; AVX2-LABEL: max_gt_v8i32:
279; AVX2: # BB#0:
280; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
281; AVX2-NEXT: retq
282;
283; AVX512-LABEL: max_gt_v8i32:
284; AVX512: # BB#0:
285; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
286; AVX512-NEXT: retq
287 %1 = icmp ugt <8 x i32> %a, %b
288 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
289 ret <8 x i32> %2
290}
291
292define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
293; SSE2-LABEL: max_gt_v8i16:
294; SSE2: # BB#0:
295; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
296; SSE2-NEXT: movdqa %xmm1, %xmm3
297; SSE2-NEXT: pxor %xmm2, %xmm3
298; SSE2-NEXT: pxor %xmm0, %xmm2
299; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
300; SSE2-NEXT: pand %xmm2, %xmm0
301; SSE2-NEXT: pandn %xmm1, %xmm2
302; SSE2-NEXT: por %xmm2, %xmm0
303; SSE2-NEXT: retq
304;
305; SSE41-LABEL: max_gt_v8i16:
306; SSE41: # BB#0:
307; SSE41-NEXT: pmaxuw %xmm1, %xmm0
308; SSE41-NEXT: retq
309;
310; SSE42-LABEL: max_gt_v8i16:
311; SSE42: # BB#0:
312; SSE42-NEXT: pmaxuw %xmm1, %xmm0
313; SSE42-NEXT: retq
314;
315; AVX-LABEL: max_gt_v8i16:
316; AVX: # BB#0:
317; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
318; AVX-NEXT: retq
319 %1 = icmp ugt <8 x i16> %a, %b
320 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
321 ret <8 x i16> %2
322}
323
324define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
325; SSE2-LABEL: max_gt_v16i16:
326; SSE2: # BB#0:
327; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [32768,32768,32768,32768,32768,32768,32768,32768]
328; SSE2-NEXT: movdqa %xmm3, %xmm6
329; SSE2-NEXT: pxor %xmm5, %xmm6
330; SSE2-NEXT: movdqa %xmm1, %xmm4
331; SSE2-NEXT: pxor %xmm5, %xmm4
332; SSE2-NEXT: pcmpgtw %xmm6, %xmm4
333; SSE2-NEXT: movdqa %xmm2, %xmm6
334; SSE2-NEXT: pxor %xmm5, %xmm6
335; SSE2-NEXT: pxor %xmm0, %xmm5
336; SSE2-NEXT: pcmpgtw %xmm6, %xmm5
337; SSE2-NEXT: pand %xmm5, %xmm0
338; SSE2-NEXT: pandn %xmm2, %xmm5
339; SSE2-NEXT: por %xmm5, %xmm0
340; SSE2-NEXT: pand %xmm4, %xmm1
341; SSE2-NEXT: pandn %xmm3, %xmm4
342; SSE2-NEXT: por %xmm1, %xmm4
343; SSE2-NEXT: movdqa %xmm4, %xmm1
344; SSE2-NEXT: retq
345;
346; SSE41-LABEL: max_gt_v16i16:
347; SSE41: # BB#0:
348; SSE41-NEXT: pmaxuw %xmm2, %xmm0
349; SSE41-NEXT: pmaxuw %xmm3, %xmm1
350; SSE41-NEXT: retq
351;
352; SSE42-LABEL: max_gt_v16i16:
353; SSE42: # BB#0:
354; SSE42-NEXT: pmaxuw %xmm2, %xmm0
355; SSE42-NEXT: pmaxuw %xmm3, %xmm1
356; SSE42-NEXT: retq
357;
358; AVX1-LABEL: max_gt_v16i16:
359; AVX1: # BB#0:
360; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
361; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
362; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
363; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
364; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
365; AVX1-NEXT: retq
366;
367; AVX2-LABEL: max_gt_v16i16:
368; AVX2: # BB#0:
369; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
370; AVX2-NEXT: retq
371;
372; AVX512-LABEL: max_gt_v16i16:
373; AVX512: # BB#0:
374; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
375; AVX512-NEXT: retq
376 %1 = icmp ugt <16 x i16> %a, %b
377 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
378 ret <16 x i16> %2
379}
380
381define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
382; SSE-LABEL: max_gt_v16i8:
383; SSE: # BB#0:
384; SSE-NEXT: pmaxub %xmm1, %xmm0
385; SSE-NEXT: retq
386;
387; AVX-LABEL: max_gt_v16i8:
388; AVX: # BB#0:
389; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
390; AVX-NEXT: retq
391 %1 = icmp ugt <16 x i8> %a, %b
392 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
393 ret <16 x i8> %2
394}
395
396define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
397; SSE-LABEL: max_gt_v32i8:
398; SSE: # BB#0:
399; SSE-NEXT: pmaxub %xmm2, %xmm0
400; SSE-NEXT: pmaxub %xmm3, %xmm1
401; SSE-NEXT: retq
402;
403; AVX1-LABEL: max_gt_v32i8:
404; AVX1: # BB#0:
405; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
406; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
407; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
408; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
409; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
410; AVX1-NEXT: retq
411;
412; AVX2-LABEL: max_gt_v32i8:
413; AVX2: # BB#0:
414; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
415; AVX2-NEXT: retq
416;
417; AVX512-LABEL: max_gt_v32i8:
418; AVX512: # BB#0:
419; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
420; AVX512-NEXT: retq
421 %1 = icmp ugt <32 x i8> %a, %b
422 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
423 ret <32 x i8> %2
424}
425
426;
427; Unsigned Maximum (GE)
428;
429
430define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
431; SSE2-LABEL: max_ge_v2i64:
432; SSE2: # BB#0:
433; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
434; SSE2-NEXT: movdqa %xmm0, %xmm3
435; SSE2-NEXT: pxor %xmm2, %xmm3
436; SSE2-NEXT: pxor %xmm1, %xmm2
437; SSE2-NEXT: movdqa %xmm2, %xmm4
438; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
439; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
440; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
441; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
442; SSE2-NEXT: pand %xmm5, %xmm2
443; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
444; SSE2-NEXT: por %xmm2, %xmm3
445; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
446; SSE2-NEXT: pxor %xmm3, %xmm2
447; SSE2-NEXT: pandn %xmm0, %xmm3
448; SSE2-NEXT: pandn %xmm1, %xmm2
449; SSE2-NEXT: por %xmm3, %xmm2
450; SSE2-NEXT: movdqa %xmm2, %xmm0
451; SSE2-NEXT: retq
452;
453; SSE41-LABEL: max_ge_v2i64:
454; SSE41: # BB#0:
455; SSE41-NEXT: movdqa %xmm0, %xmm2
456; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
457; SSE41-NEXT: movdqa %xmm2, %xmm3
458; SSE41-NEXT: pxor %xmm0, %xmm3
459; SSE41-NEXT: pxor %xmm1, %xmm0
460; SSE41-NEXT: movdqa %xmm0, %xmm4
461; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
462; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
463; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
464; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
465; SSE41-NEXT: pand %xmm5, %xmm0
466; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
467; SSE41-NEXT: por %xmm0, %xmm3
468; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
469; SSE41-NEXT: pxor %xmm3, %xmm0
470; SSE41-NEXT: blendvpd %xmm2, %xmm1
471; SSE41-NEXT: movapd %xmm1, %xmm0
472; SSE41-NEXT: retq
473;
474; SSE42-LABEL: max_ge_v2i64:
475; SSE42: # BB#0:
476; SSE42-NEXT: movdqa %xmm0, %xmm2
477; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
478; SSE42-NEXT: pxor %xmm3, %xmm0
479; SSE42-NEXT: pxor %xmm1, %xmm3
480; SSE42-NEXT: pcmpgtq %xmm0, %xmm3
481; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
482; SSE42-NEXT: pxor %xmm3, %xmm0
483; SSE42-NEXT: blendvpd %xmm2, %xmm1
484; SSE42-NEXT: movapd %xmm1, %xmm0
485; SSE42-NEXT: retq
486;
487; AVX-LABEL: max_ge_v2i64:
488; AVX: # BB#0:
489; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
490; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
491; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2
492; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
493; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
494; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
495; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
496; AVX-NEXT: retq
497 %1 = icmp uge <2 x i64> %a, %b
498 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
499 ret <2 x i64> %2
500}
501
502define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
503; SSE2-LABEL: max_ge_v4i64:
504; SSE2: # BB#0:
505; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
506; SSE2-NEXT: movdqa %xmm1, %xmm4
507; SSE2-NEXT: pxor %xmm7, %xmm4
508; SSE2-NEXT: movdqa %xmm3, %xmm5
509; SSE2-NEXT: pxor %xmm7, %xmm5
510; SSE2-NEXT: movdqa %xmm5, %xmm6
511; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
512; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
513; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
514; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
515; SSE2-NEXT: pand %xmm8, %xmm4
516; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
517; SSE2-NEXT: por %xmm4, %xmm8
518; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
519; SSE2-NEXT: movdqa %xmm8, %xmm9
520; SSE2-NEXT: pxor %xmm4, %xmm9
521; SSE2-NEXT: movdqa %xmm0, %xmm6
522; SSE2-NEXT: pxor %xmm7, %xmm6
523; SSE2-NEXT: pxor %xmm2, %xmm7
524; SSE2-NEXT: movdqa %xmm7, %xmm5
525; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
526; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
527; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
528; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
529; SSE2-NEXT: pand %xmm10, %xmm6
530; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
531; SSE2-NEXT: por %xmm6, %xmm5
532; SSE2-NEXT: pxor %xmm5, %xmm4
533; SSE2-NEXT: pandn %xmm0, %xmm5
534; SSE2-NEXT: pandn %xmm2, %xmm4
535; SSE2-NEXT: por %xmm5, %xmm4
536; SSE2-NEXT: pandn %xmm1, %xmm8
537; SSE2-NEXT: pandn %xmm3, %xmm9
538; SSE2-NEXT: por %xmm8, %xmm9
539; SSE2-NEXT: movdqa %xmm4, %xmm0
540; SSE2-NEXT: movdqa %xmm9, %xmm1
541; SSE2-NEXT: retq
542;
543; SSE41-LABEL: max_ge_v4i64:
544; SSE41: # BB#0:
545; SSE41-NEXT: movdqa %xmm0, %xmm8
546; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
547; SSE41-NEXT: movdqa %xmm1, %xmm5
548; SSE41-NEXT: pxor %xmm0, %xmm5
549; SSE41-NEXT: movdqa %xmm3, %xmm6
550; SSE41-NEXT: pxor %xmm0, %xmm6
551; SSE41-NEXT: movdqa %xmm6, %xmm7
552; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
553; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
554; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
555; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
556; SSE41-NEXT: pand %xmm4, %xmm6
557; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
558; SSE41-NEXT: por %xmm6, %xmm5
559; SSE41-NEXT: pcmpeqd %xmm9, %xmm9
560; SSE41-NEXT: pxor %xmm9, %xmm5
561; SSE41-NEXT: movdqa %xmm8, %xmm6
562; SSE41-NEXT: pxor %xmm0, %xmm6
563; SSE41-NEXT: pxor %xmm2, %xmm0
564; SSE41-NEXT: movdqa %xmm0, %xmm7
565; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
566; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
567; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
568; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
569; SSE41-NEXT: pand %xmm4, %xmm6
570; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
571; SSE41-NEXT: por %xmm6, %xmm0
572; SSE41-NEXT: pxor %xmm9, %xmm0
573; SSE41-NEXT: blendvpd %xmm8, %xmm2
574; SSE41-NEXT: movdqa %xmm5, %xmm0
575; SSE41-NEXT: blendvpd %xmm1, %xmm3
576; SSE41-NEXT: movapd %xmm2, %xmm0
577; SSE41-NEXT: movapd %xmm3, %xmm1
578; SSE41-NEXT: retq
579;
580; SSE42-LABEL: max_ge_v4i64:
581; SSE42: # BB#0:
582; SSE42-NEXT: movdqa %xmm0, %xmm4
583; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
584; SSE42-NEXT: movdqa %xmm1, %xmm6
585; SSE42-NEXT: pxor %xmm0, %xmm6
586; SSE42-NEXT: movdqa %xmm3, %xmm5
587; SSE42-NEXT: pxor %xmm0, %xmm5
588; SSE42-NEXT: pcmpgtq %xmm6, %xmm5
589; SSE42-NEXT: pcmpeqd %xmm6, %xmm6
590; SSE42-NEXT: pxor %xmm6, %xmm5
591; SSE42-NEXT: movdqa %xmm4, %xmm7
592; SSE42-NEXT: pxor %xmm0, %xmm7
593; SSE42-NEXT: pxor %xmm2, %xmm0
594; SSE42-NEXT: pcmpgtq %xmm7, %xmm0
595; SSE42-NEXT: pxor %xmm6, %xmm0
596; SSE42-NEXT: blendvpd %xmm4, %xmm2
597; SSE42-NEXT: movdqa %xmm5, %xmm0
598; SSE42-NEXT: blendvpd %xmm1, %xmm3
599; SSE42-NEXT: movapd %xmm2, %xmm0
600; SSE42-NEXT: movapd %xmm3, %xmm1
601; SSE42-NEXT: retq
602;
603; AVX1-LABEL: max_ge_v4i64:
604; AVX1: # BB#0:
605; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
606; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
607; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
608; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
609; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
610; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
611; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
612; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
613; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm5
614; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3
615; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3
616; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
617; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
618; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
619; AVX1-NEXT: retq
620;
621; AVX2-LABEL: max_ge_v4i64:
622; AVX2: # BB#0:
623; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
624; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
625; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
626; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
627; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
628; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
629; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
630; AVX2-NEXT: retq
631;
632; AVX512-LABEL: max_ge_v4i64:
633; AVX512: # BB#0:
634; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
635; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3
636; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2
637; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
638; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
639; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
640; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
641; AVX512-NEXT: retq
642 %1 = icmp uge <4 x i64> %a, %b
643 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
644 ret <4 x i64> %2
645}
646
647define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
648; SSE2-LABEL: max_ge_v4i32:
649; SSE2: # BB#0:
650; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
651; SSE2-NEXT: movdqa %xmm0, %xmm2
652; SSE2-NEXT: pxor %xmm3, %xmm2
653; SSE2-NEXT: pxor %xmm1, %xmm3
654; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
655; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
656; SSE2-NEXT: pxor %xmm3, %xmm2
657; SSE2-NEXT: pandn %xmm0, %xmm3
658; SSE2-NEXT: pandn %xmm1, %xmm2
659; SSE2-NEXT: por %xmm3, %xmm2
660; SSE2-NEXT: movdqa %xmm2, %xmm0
661; SSE2-NEXT: retq
662;
663; SSE41-LABEL: max_ge_v4i32:
664; SSE41: # BB#0:
665; SSE41-NEXT: pmaxud %xmm1, %xmm0
666; SSE41-NEXT: retq
667;
668; SSE42-LABEL: max_ge_v4i32:
669; SSE42: # BB#0:
670; SSE42-NEXT: pmaxud %xmm1, %xmm0
671; SSE42-NEXT: retq
672;
673; AVX-LABEL: max_ge_v4i32:
674; AVX: # BB#0:
675; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
676; AVX-NEXT: retq
677 %1 = icmp uge <4 x i32> %a, %b
678 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
679 ret <4 x i32> %2
680}
681
682define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
683; SSE2-LABEL: max_ge_v8i32:
684; SSE2: # BB#0:
685; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
686; SSE2-NEXT: movdqa %xmm1, %xmm4
687; SSE2-NEXT: pxor %xmm6, %xmm4
688; SSE2-NEXT: movdqa %xmm3, %xmm7
689; SSE2-NEXT: pxor %xmm6, %xmm7
690; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
691; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
692; SSE2-NEXT: movdqa %xmm7, %xmm5
693; SSE2-NEXT: pxor %xmm4, %xmm5
694; SSE2-NEXT: movdqa %xmm0, %xmm8
695; SSE2-NEXT: pxor %xmm6, %xmm8
696; SSE2-NEXT: pxor %xmm2, %xmm6
697; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
698; SSE2-NEXT: pxor %xmm6, %xmm4
699; SSE2-NEXT: pandn %xmm0, %xmm6
700; SSE2-NEXT: pandn %xmm2, %xmm4
701; SSE2-NEXT: por %xmm6, %xmm4
702; SSE2-NEXT: pandn %xmm1, %xmm7
703; SSE2-NEXT: pandn %xmm3, %xmm5
704; SSE2-NEXT: por %xmm7, %xmm5
705; SSE2-NEXT: movdqa %xmm4, %xmm0
706; SSE2-NEXT: movdqa %xmm5, %xmm1
707; SSE2-NEXT: retq
708;
709; SSE41-LABEL: max_ge_v8i32:
710; SSE41: # BB#0:
711; SSE41-NEXT: pmaxud %xmm2, %xmm0
712; SSE41-NEXT: pmaxud %xmm3, %xmm1
713; SSE41-NEXT: retq
714;
715; SSE42-LABEL: max_ge_v8i32:
716; SSE42: # BB#0:
717; SSE42-NEXT: pmaxud %xmm2, %xmm0
718; SSE42-NEXT: pmaxud %xmm3, %xmm1
719; SSE42-NEXT: retq
720;
721; AVX1-LABEL: max_ge_v8i32:
722; AVX1: # BB#0:
723; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
724; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
725; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
726; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
727; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
728; AVX1-NEXT: retq
729;
730; AVX2-LABEL: max_ge_v8i32:
731; AVX2: # BB#0:
732; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
733; AVX2-NEXT: retq
734;
735; AVX512-LABEL: max_ge_v8i32:
736; AVX512: # BB#0:
737; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
738; AVX512-NEXT: retq
739 %1 = icmp uge <8 x i32> %a, %b
740 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
741 ret <8 x i32> %2
742}
743
744define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
745; SSE2-LABEL: max_ge_v8i16:
746; SSE2: # BB#0:
747; SSE2-NEXT: movdqa %xmm1, %xmm2
748; SSE2-NEXT: psubusw %xmm0, %xmm2
749; SSE2-NEXT: pxor %xmm3, %xmm3
750; SSE2-NEXT: pcmpeqw %xmm2, %xmm3
751; SSE2-NEXT: pand %xmm3, %xmm0
752; SSE2-NEXT: pandn %xmm1, %xmm3
753; SSE2-NEXT: por %xmm3, %xmm0
754; SSE2-NEXT: retq
755;
756; SSE41-LABEL: max_ge_v8i16:
757; SSE41: # BB#0:
758; SSE41-NEXT: pmaxuw %xmm1, %xmm0
759; SSE41-NEXT: retq
760;
761; SSE42-LABEL: max_ge_v8i16:
762; SSE42: # BB#0:
763; SSE42-NEXT: pmaxuw %xmm1, %xmm0
764; SSE42-NEXT: retq
765;
766; AVX-LABEL: max_ge_v8i16:
767; AVX: # BB#0:
768; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
769; AVX-NEXT: retq
770 %1 = icmp uge <8 x i16> %a, %b
771 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
772 ret <8 x i16> %2
773}
774
775define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
776; SSE2-LABEL: max_ge_v16i16:
777; SSE2: # BB#0:
778; SSE2-NEXT: movdqa %xmm3, %xmm4
779; SSE2-NEXT: psubusw %xmm1, %xmm4
780; SSE2-NEXT: pxor %xmm5, %xmm5
781; SSE2-NEXT: pcmpeqw %xmm5, %xmm4
782; SSE2-NEXT: movdqa %xmm2, %xmm6
783; SSE2-NEXT: psubusw %xmm0, %xmm6
784; SSE2-NEXT: pcmpeqw %xmm5, %xmm6
785; SSE2-NEXT: pand %xmm6, %xmm0
786; SSE2-NEXT: pandn %xmm2, %xmm6
787; SSE2-NEXT: por %xmm6, %xmm0
788; SSE2-NEXT: pand %xmm4, %xmm1
789; SSE2-NEXT: pandn %xmm3, %xmm4
790; SSE2-NEXT: por %xmm4, %xmm1
791; SSE2-NEXT: retq
792;
793; SSE41-LABEL: max_ge_v16i16:
794; SSE41: # BB#0:
795; SSE41-NEXT: pmaxuw %xmm2, %xmm0
796; SSE41-NEXT: pmaxuw %xmm3, %xmm1
797; SSE41-NEXT: retq
798;
799; SSE42-LABEL: max_ge_v16i16:
800; SSE42: # BB#0:
801; SSE42-NEXT: pmaxuw %xmm2, %xmm0
802; SSE42-NEXT: pmaxuw %xmm3, %xmm1
803; SSE42-NEXT: retq
804;
805; AVX1-LABEL: max_ge_v16i16:
806; AVX1: # BB#0:
807; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
808; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
809; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
810; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
811; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
812; AVX1-NEXT: retq
813;
814; AVX2-LABEL: max_ge_v16i16:
815; AVX2: # BB#0:
816; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
817; AVX2-NEXT: retq
818;
819; AVX512-LABEL: max_ge_v16i16:
820; AVX512: # BB#0:
821; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
822; AVX512-NEXT: retq
823 %1 = icmp uge <16 x i16> %a, %b
824 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
825 ret <16 x i16> %2
826}
827
828define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
829; SSE-LABEL: max_ge_v16i8:
830; SSE: # BB#0:
831; SSE-NEXT: pmaxub %xmm1, %xmm0
832; SSE-NEXT: retq
833;
834; AVX-LABEL: max_ge_v16i8:
835; AVX: # BB#0:
836; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
837; AVX-NEXT: retq
838 %1 = icmp uge <16 x i8> %a, %b
839 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
840 ret <16 x i8> %2
841}
842
843define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
844; SSE-LABEL: max_ge_v32i8:
845; SSE: # BB#0:
846; SSE-NEXT: pmaxub %xmm2, %xmm0
847; SSE-NEXT: pmaxub %xmm3, %xmm1
848; SSE-NEXT: retq
849;
850; AVX1-LABEL: max_ge_v32i8:
851; AVX1: # BB#0:
852; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
853; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
854; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
855; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
856; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
857; AVX1-NEXT: retq
858;
859; AVX2-LABEL: max_ge_v32i8:
860; AVX2: # BB#0:
861; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
862; AVX2-NEXT: retq
863;
864; AVX512-LABEL: max_ge_v32i8:
865; AVX512: # BB#0:
866; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
867; AVX512-NEXT: retq
868 %1 = icmp uge <32 x i8> %a, %b
869 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
870 ret <32 x i8> %2
871}
872
873;
874; Unsigned Minimum (LT)
875;
876
Simon Pilgrim1fdc1772015-08-14 11:03:31 +0000877define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
878; SSE2-LABEL: min_lt_v2i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +0000879; SSE2: # BB#0:
880; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
881; SSE2-NEXT: movdqa %xmm0, %xmm3
882; SSE2-NEXT: pxor %xmm2, %xmm3
883; SSE2-NEXT: pxor %xmm1, %xmm2
884; SSE2-NEXT: movdqa %xmm2, %xmm4
885; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
886; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
887; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
888; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
889; SSE2-NEXT: pand %xmm5, %xmm2
890; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
891; SSE2-NEXT: por %xmm2, %xmm3
892; SSE2-NEXT: pand %xmm3, %xmm0
893; SSE2-NEXT: pandn %xmm1, %xmm3
894; SSE2-NEXT: por %xmm3, %xmm0
895; SSE2-NEXT: retq
896;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +0000897; SSE41-LABEL: min_lt_v2i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +0000898; SSE41: # BB#0:
899; SSE41-NEXT: movdqa %xmm0, %xmm2
900; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
901; SSE41-NEXT: movdqa %xmm2, %xmm3
902; SSE41-NEXT: pxor %xmm0, %xmm3
903; SSE41-NEXT: pxor %xmm1, %xmm0
904; SSE41-NEXT: movdqa %xmm0, %xmm4
905; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
906; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
907; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
908; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
909; SSE41-NEXT: pand %xmm5, %xmm3
910; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
911; SSE41-NEXT: por %xmm3, %xmm0
912; SSE41-NEXT: blendvpd %xmm2, %xmm1
913; SSE41-NEXT: movapd %xmm1, %xmm0
914; SSE41-NEXT: retq
915;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +0000916; SSE42-LABEL: min_lt_v2i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +0000917; SSE42: # BB#0:
918; SSE42-NEXT: movdqa %xmm0, %xmm2
919; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
920; SSE42-NEXT: movdqa %xmm2, %xmm3
921; SSE42-NEXT: pxor %xmm0, %xmm3
922; SSE42-NEXT: pxor %xmm1, %xmm0
923; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
924; SSE42-NEXT: blendvpd %xmm2, %xmm1
925; SSE42-NEXT: movapd %xmm1, %xmm0
926; SSE42-NEXT: retq
927;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +0000928; AVX-LABEL: min_lt_v2i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +0000929; AVX: # BB#0:
930; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
931; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
932; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2
933; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
934; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
935; AVX-NEXT: retq
936 %1 = icmp ult <2 x i64> %a, %b
937 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
938 ret <2 x i64> %2
939}
940
Simon Pilgrim1fdc1772015-08-14 11:03:31 +0000941define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
942; SSE2-LABEL: min_lt_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +0000943; SSE2: # BB#0:
944; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
945; SSE2-NEXT: movdqa %xmm1, %xmm5
946; SSE2-NEXT: pxor %xmm4, %xmm5
947; SSE2-NEXT: movdqa %xmm3, %xmm6
948; SSE2-NEXT: pxor %xmm4, %xmm6
949; SSE2-NEXT: movdqa %xmm6, %xmm7
950; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
951; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
952; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
953; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
954; SSE2-NEXT: pand %xmm8, %xmm5
955; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
956; SSE2-NEXT: por %xmm5, %xmm6
957; SSE2-NEXT: movdqa %xmm0, %xmm5
958; SSE2-NEXT: pxor %xmm4, %xmm5
959; SSE2-NEXT: pxor %xmm2, %xmm4
960; SSE2-NEXT: movdqa %xmm4, %xmm7
961; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
962; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
963; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
964; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
965; SSE2-NEXT: pand %xmm8, %xmm4
966; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
967; SSE2-NEXT: por %xmm4, %xmm5
968; SSE2-NEXT: pand %xmm5, %xmm0
969; SSE2-NEXT: pandn %xmm2, %xmm5
970; SSE2-NEXT: por %xmm5, %xmm0
971; SSE2-NEXT: pand %xmm6, %xmm1
972; SSE2-NEXT: pandn %xmm3, %xmm6
973; SSE2-NEXT: por %xmm6, %xmm1
974; SSE2-NEXT: retq
975;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +0000976; SSE41-LABEL: min_lt_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +0000977; SSE41: # BB#0:
978; SSE41-NEXT: movdqa %xmm0, %xmm8
979; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
980; SSE41-NEXT: movdqa %xmm1, %xmm5
981; SSE41-NEXT: pxor %xmm0, %xmm5
982; SSE41-NEXT: movdqa %xmm3, %xmm6
983; SSE41-NEXT: pxor %xmm0, %xmm6
984; SSE41-NEXT: movdqa %xmm6, %xmm7
985; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
986; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
987; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
988; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
989; SSE41-NEXT: pand %xmm4, %xmm6
990; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
991; SSE41-NEXT: por %xmm6, %xmm5
992; SSE41-NEXT: movdqa %xmm8, %xmm4
993; SSE41-NEXT: pxor %xmm0, %xmm4
994; SSE41-NEXT: pxor %xmm2, %xmm0
995; SSE41-NEXT: movdqa %xmm0, %xmm6
996; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
997; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
998; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
999; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1000; SSE41-NEXT: pand %xmm7, %xmm4
1001; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1002; SSE41-NEXT: por %xmm4, %xmm0
1003; SSE41-NEXT: blendvpd %xmm8, %xmm2
1004; SSE41-NEXT: movdqa %xmm5, %xmm0
1005; SSE41-NEXT: blendvpd %xmm1, %xmm3
1006; SSE41-NEXT: movapd %xmm2, %xmm0
1007; SSE41-NEXT: movapd %xmm3, %xmm1
1008; SSE41-NEXT: retq
1009;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001010; SSE42-LABEL: min_lt_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001011; SSE42: # BB#0:
1012; SSE42-NEXT: movdqa %xmm0, %xmm4
1013; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
1014; SSE42-NEXT: movdqa %xmm1, %xmm6
1015; SSE42-NEXT: pxor %xmm0, %xmm6
1016; SSE42-NEXT: movdqa %xmm3, %xmm5
1017; SSE42-NEXT: pxor %xmm0, %xmm5
1018; SSE42-NEXT: pcmpgtq %xmm6, %xmm5
1019; SSE42-NEXT: movdqa %xmm4, %xmm6
1020; SSE42-NEXT: pxor %xmm0, %xmm6
1021; SSE42-NEXT: pxor %xmm2, %xmm0
1022; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
1023; SSE42-NEXT: blendvpd %xmm4, %xmm2
1024; SSE42-NEXT: movdqa %xmm5, %xmm0
1025; SSE42-NEXT: blendvpd %xmm1, %xmm3
1026; SSE42-NEXT: movapd %xmm2, %xmm0
1027; SSE42-NEXT: movapd %xmm3, %xmm1
1028; SSE42-NEXT: retq
1029;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001030; AVX1-LABEL: min_lt_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001031; AVX1: # BB#0:
1032; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1033; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1034; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
1035; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1036; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
1037; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
1038; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm4
1039; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3
1040; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
1041; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1042; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1043; AVX1-NEXT: retq
1044;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001045; AVX2-LABEL: min_lt_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001046; AVX2: # BB#0:
1047; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
1048; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
1049; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
1050; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1051; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1052; AVX2-NEXT: retq
1053;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001054; AVX512-LABEL: min_lt_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001055; AVX512: # BB#0:
1056; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
1057; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3
1058; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2
1059; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1060; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1061; AVX512-NEXT: retq
1062 %1 = icmp ult <4 x i64> %a, %b
1063 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1064 ret <4 x i64> %2
1065}
1066
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001067define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
1068; SSE2-LABEL: min_lt_v4i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001069; SSE2: # BB#0:
1070; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1071; SSE2-NEXT: movdqa %xmm0, %xmm3
1072; SSE2-NEXT: pxor %xmm2, %xmm3
1073; SSE2-NEXT: pxor %xmm1, %xmm2
1074; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
1075; SSE2-NEXT: pand %xmm2, %xmm0
1076; SSE2-NEXT: pandn %xmm1, %xmm2
1077; SSE2-NEXT: por %xmm2, %xmm0
1078; SSE2-NEXT: retq
1079;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001080; SSE41-LABEL: min_lt_v4i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001081; SSE41: # BB#0:
1082; SSE41-NEXT: pminud %xmm1, %xmm0
1083; SSE41-NEXT: retq
1084;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001085; SSE42-LABEL: min_lt_v4i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001086; SSE42: # BB#0:
1087; SSE42-NEXT: pminud %xmm1, %xmm0
1088; SSE42-NEXT: retq
1089;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001090; AVX-LABEL: min_lt_v4i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001091; AVX: # BB#0:
1092; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
1093; AVX-NEXT: retq
1094 %1 = icmp ult <4 x i32> %a, %b
1095 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1096 ret <4 x i32> %2
1097}
1098
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001099define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
1100; SSE2-LABEL: min_lt_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001101; SSE2: # BB#0:
1102; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1103; SSE2-NEXT: movdqa %xmm1, %xmm5
1104; SSE2-NEXT: pxor %xmm4, %xmm5
1105; SSE2-NEXT: movdqa %xmm3, %xmm6
1106; SSE2-NEXT: pxor %xmm4, %xmm6
1107; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
1108; SSE2-NEXT: movdqa %xmm0, %xmm5
1109; SSE2-NEXT: pxor %xmm4, %xmm5
1110; SSE2-NEXT: pxor %xmm2, %xmm4
1111; SSE2-NEXT: pcmpgtd %xmm5, %xmm4
1112; SSE2-NEXT: pand %xmm4, %xmm0
1113; SSE2-NEXT: pandn %xmm2, %xmm4
1114; SSE2-NEXT: por %xmm4, %xmm0
1115; SSE2-NEXT: pand %xmm6, %xmm1
1116; SSE2-NEXT: pandn %xmm3, %xmm6
1117; SSE2-NEXT: por %xmm6, %xmm1
1118; SSE2-NEXT: retq
1119;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001120; SSE41-LABEL: min_lt_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001121; SSE41: # BB#0:
1122; SSE41-NEXT: pminud %xmm2, %xmm0
1123; SSE41-NEXT: pminud %xmm3, %xmm1
1124; SSE41-NEXT: retq
1125;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001126; SSE42-LABEL: min_lt_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001127; SSE42: # BB#0:
1128; SSE42-NEXT: pminud %xmm2, %xmm0
1129; SSE42-NEXT: pminud %xmm3, %xmm1
1130; SSE42-NEXT: retq
1131;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001132; AVX1-LABEL: min_lt_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001133; AVX1: # BB#0:
1134; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1135; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1136; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1137; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1138; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1139; AVX1-NEXT: retq
1140;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001141; AVX2-LABEL: min_lt_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001142; AVX2: # BB#0:
1143; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1144; AVX2-NEXT: retq
1145;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001146; AVX512-LABEL: min_lt_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001147; AVX512: # BB#0:
1148; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
1149; AVX512-NEXT: retq
1150 %1 = icmp ult <8 x i32> %a, %b
1151 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1152 ret <8 x i32> %2
1153}
1154
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001155define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
1156; SSE2-LABEL: min_lt_v8i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001157; SSE2: # BB#0:
1158; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
1159; SSE2-NEXT: movdqa %xmm0, %xmm3
1160; SSE2-NEXT: pxor %xmm2, %xmm3
1161; SSE2-NEXT: pxor %xmm1, %xmm2
1162; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
1163; SSE2-NEXT: pand %xmm2, %xmm0
1164; SSE2-NEXT: pandn %xmm1, %xmm2
1165; SSE2-NEXT: por %xmm2, %xmm0
1166; SSE2-NEXT: retq
1167;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001168; SSE41-LABEL: min_lt_v8i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001169; SSE41: # BB#0:
1170; SSE41-NEXT: pminuw %xmm1, %xmm0
1171; SSE41-NEXT: retq
1172;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001173; SSE42-LABEL: min_lt_v8i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001174; SSE42: # BB#0:
1175; SSE42-NEXT: pminuw %xmm1, %xmm0
1176; SSE42-NEXT: retq
1177;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001178; AVX-LABEL: min_lt_v8i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001179; AVX: # BB#0:
1180; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1181; AVX-NEXT: retq
1182 %1 = icmp ult <8 x i16> %a, %b
1183 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1184 ret <8 x i16> %2
1185}
1186
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001187define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
1188; SSE2-LABEL: min_lt_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001189; SSE2: # BB#0:
1190; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
1191; SSE2-NEXT: movdqa %xmm1, %xmm5
1192; SSE2-NEXT: pxor %xmm4, %xmm5
1193; SSE2-NEXT: movdqa %xmm3, %xmm6
1194; SSE2-NEXT: pxor %xmm4, %xmm6
1195; SSE2-NEXT: pcmpgtw %xmm5, %xmm6
1196; SSE2-NEXT: movdqa %xmm0, %xmm5
1197; SSE2-NEXT: pxor %xmm4, %xmm5
1198; SSE2-NEXT: pxor %xmm2, %xmm4
1199; SSE2-NEXT: pcmpgtw %xmm5, %xmm4
1200; SSE2-NEXT: pand %xmm4, %xmm0
1201; SSE2-NEXT: pandn %xmm2, %xmm4
1202; SSE2-NEXT: por %xmm4, %xmm0
1203; SSE2-NEXT: pand %xmm6, %xmm1
1204; SSE2-NEXT: pandn %xmm3, %xmm6
1205; SSE2-NEXT: por %xmm6, %xmm1
1206; SSE2-NEXT: retq
1207;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001208; SSE41-LABEL: min_lt_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001209; SSE41: # BB#0:
1210; SSE41-NEXT: pminuw %xmm2, %xmm0
1211; SSE41-NEXT: pminuw %xmm3, %xmm1
1212; SSE41-NEXT: retq
1213;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001214; SSE42-LABEL: min_lt_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001215; SSE42: # BB#0:
1216; SSE42-NEXT: pminuw %xmm2, %xmm0
1217; SSE42-NEXT: pminuw %xmm3, %xmm1
1218; SSE42-NEXT: retq
1219;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001220; AVX1-LABEL: min_lt_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001221; AVX1: # BB#0:
1222; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1223; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1224; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
1225; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1226; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1227; AVX1-NEXT: retq
1228;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001229; AVX2-LABEL: min_lt_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001230; AVX2: # BB#0:
1231; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1232; AVX2-NEXT: retq
1233;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001234; AVX512-LABEL: min_lt_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001235; AVX512: # BB#0:
1236; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1237; AVX512-NEXT: retq
1238 %1 = icmp ult <16 x i16> %a, %b
1239 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1240 ret <16 x i16> %2
1241}
1242
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001243define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
1244; SSE-LABEL: min_lt_v16i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001245; SSE: # BB#0:
1246; SSE-NEXT: pminub %xmm1, %xmm0
1247; SSE-NEXT: retq
1248;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001249; AVX-LABEL: min_lt_v16i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001250; AVX: # BB#0:
1251; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
1252; AVX-NEXT: retq
1253 %1 = icmp ult <16 x i8> %a, %b
1254 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1255 ret <16 x i8> %2
1256}
1257
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001258define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
1259; SSE-LABEL: min_lt_v32i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001260; SSE: # BB#0:
1261; SSE-NEXT: pminub %xmm2, %xmm0
1262; SSE-NEXT: pminub %xmm3, %xmm1
1263; SSE-NEXT: retq
1264;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001265; AVX1-LABEL: min_lt_v32i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001266; AVX1: # BB#0:
1267; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1268; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1269; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
1270; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1271; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1272; AVX1-NEXT: retq
1273;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001274; AVX2-LABEL: min_lt_v32i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001275; AVX2: # BB#0:
1276; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1277; AVX2-NEXT: retq
1278;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001279; AVX512-LABEL: min_lt_v32i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001280; AVX512: # BB#0:
1281; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
1282; AVX512-NEXT: retq
1283 %1 = icmp ult <32 x i8> %a, %b
1284 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1285 ret <32 x i8> %2
1286}
1287
1288;
1289; Unsigned Minimum (LE)
1290;
1291
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001292define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
1293; SSE2-LABEL: min_le_v2i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001294; SSE2: # BB#0:
1295; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1296; SSE2-NEXT: movdqa %xmm1, %xmm3
1297; SSE2-NEXT: pxor %xmm2, %xmm3
1298; SSE2-NEXT: pxor %xmm0, %xmm2
1299; SSE2-NEXT: movdqa %xmm2, %xmm4
1300; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1301; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1302; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
1303; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1304; SSE2-NEXT: pand %xmm5, %xmm2
1305; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1306; SSE2-NEXT: por %xmm2, %xmm3
1307; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1308; SSE2-NEXT: pxor %xmm3, %xmm2
1309; SSE2-NEXT: pandn %xmm0, %xmm3
1310; SSE2-NEXT: pandn %xmm1, %xmm2
1311; SSE2-NEXT: por %xmm3, %xmm2
1312; SSE2-NEXT: movdqa %xmm2, %xmm0
1313; SSE2-NEXT: retq
1314;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001315; SSE41-LABEL: min_le_v2i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001316; SSE41: # BB#0:
1317; SSE41-NEXT: movdqa %xmm0, %xmm2
1318; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1319; SSE41-NEXT: movdqa %xmm1, %xmm3
1320; SSE41-NEXT: pxor %xmm0, %xmm3
1321; SSE41-NEXT: pxor %xmm2, %xmm0
1322; SSE41-NEXT: movdqa %xmm0, %xmm4
1323; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
1324; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1325; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
1326; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1327; SSE41-NEXT: pand %xmm5, %xmm0
1328; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1329; SSE41-NEXT: por %xmm0, %xmm3
1330; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
1331; SSE41-NEXT: pxor %xmm3, %xmm0
1332; SSE41-NEXT: blendvpd %xmm2, %xmm1
1333; SSE41-NEXT: movapd %xmm1, %xmm0
1334; SSE41-NEXT: retq
1335;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001336; SSE42-LABEL: min_le_v2i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001337; SSE42: # BB#0:
1338; SSE42-NEXT: movdqa %xmm0, %xmm2
1339; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1340; SSE42-NEXT: movdqa %xmm1, %xmm0
1341; SSE42-NEXT: pxor %xmm3, %xmm0
1342; SSE42-NEXT: pxor %xmm2, %xmm3
1343; SSE42-NEXT: pcmpgtq %xmm0, %xmm3
1344; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
1345; SSE42-NEXT: pxor %xmm3, %xmm0
1346; SSE42-NEXT: blendvpd %xmm2, %xmm1
1347; SSE42-NEXT: movapd %xmm1, %xmm0
1348; SSE42-NEXT: retq
1349;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001350; AVX-LABEL: min_le_v2i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001351; AVX: # BB#0:
1352; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1353; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3
1354; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2
1355; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1356; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
1357; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
1358; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1359; AVX-NEXT: retq
1360 %1 = icmp ule <2 x i64> %a, %b
1361 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
1362 ret <2 x i64> %2
1363}
1364
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001365define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
1366; SSE2-LABEL: min_le_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001367; SSE2: # BB#0:
1368; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
1369; SSE2-NEXT: movdqa %xmm3, %xmm4
1370; SSE2-NEXT: pxor %xmm7, %xmm4
1371; SSE2-NEXT: movdqa %xmm1, %xmm5
1372; SSE2-NEXT: pxor %xmm7, %xmm5
1373; SSE2-NEXT: movdqa %xmm5, %xmm6
1374; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
1375; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
1376; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
1377; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1378; SSE2-NEXT: pand %xmm8, %xmm4
1379; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
1380; SSE2-NEXT: por %xmm4, %xmm8
1381; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
1382; SSE2-NEXT: movdqa %xmm8, %xmm9
1383; SSE2-NEXT: pxor %xmm4, %xmm9
1384; SSE2-NEXT: movdqa %xmm2, %xmm6
1385; SSE2-NEXT: pxor %xmm7, %xmm6
1386; SSE2-NEXT: pxor %xmm0, %xmm7
1387; SSE2-NEXT: movdqa %xmm7, %xmm5
1388; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
1389; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
1390; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
1391; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
1392; SSE2-NEXT: pand %xmm10, %xmm6
1393; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1394; SSE2-NEXT: por %xmm6, %xmm5
1395; SSE2-NEXT: pxor %xmm5, %xmm4
1396; SSE2-NEXT: pandn %xmm0, %xmm5
1397; SSE2-NEXT: pandn %xmm2, %xmm4
1398; SSE2-NEXT: por %xmm5, %xmm4
1399; SSE2-NEXT: pandn %xmm1, %xmm8
1400; SSE2-NEXT: pandn %xmm3, %xmm9
1401; SSE2-NEXT: por %xmm8, %xmm9
1402; SSE2-NEXT: movdqa %xmm4, %xmm0
1403; SSE2-NEXT: movdqa %xmm9, %xmm1
1404; SSE2-NEXT: retq
1405;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001406; SSE41-LABEL: min_le_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001407; SSE41: # BB#0:
1408; SSE41-NEXT: movdqa %xmm0, %xmm8
1409; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1410; SSE41-NEXT: movdqa %xmm3, %xmm5
1411; SSE41-NEXT: pxor %xmm0, %xmm5
1412; SSE41-NEXT: movdqa %xmm1, %xmm6
1413; SSE41-NEXT: pxor %xmm0, %xmm6
1414; SSE41-NEXT: movdqa %xmm6, %xmm7
1415; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
1416; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
1417; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
1418; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1419; SSE41-NEXT: pand %xmm4, %xmm6
1420; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1421; SSE41-NEXT: por %xmm6, %xmm5
1422; SSE41-NEXT: pcmpeqd %xmm9, %xmm9
1423; SSE41-NEXT: pxor %xmm9, %xmm5
1424; SSE41-NEXT: movdqa %xmm2, %xmm6
1425; SSE41-NEXT: pxor %xmm0, %xmm6
1426; SSE41-NEXT: pxor %xmm8, %xmm0
1427; SSE41-NEXT: movdqa %xmm0, %xmm7
1428; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
1429; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
1430; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
1431; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1432; SSE41-NEXT: pand %xmm4, %xmm6
1433; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
1434; SSE41-NEXT: por %xmm6, %xmm0
1435; SSE41-NEXT: pxor %xmm9, %xmm0
1436; SSE41-NEXT: blendvpd %xmm8, %xmm2
1437; SSE41-NEXT: movdqa %xmm5, %xmm0
1438; SSE41-NEXT: blendvpd %xmm1, %xmm3
1439; SSE41-NEXT: movapd %xmm2, %xmm0
1440; SSE41-NEXT: movapd %xmm3, %xmm1
1441; SSE41-NEXT: retq
1442;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001443; SSE42-LABEL: min_le_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001444; SSE42: # BB#0:
1445; SSE42-NEXT: movdqa %xmm0, %xmm4
1446; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
1447; SSE42-NEXT: movdqa %xmm3, %xmm6
1448; SSE42-NEXT: pxor %xmm0, %xmm6
1449; SSE42-NEXT: movdqa %xmm1, %xmm5
1450; SSE42-NEXT: pxor %xmm0, %xmm5
1451; SSE42-NEXT: pcmpgtq %xmm6, %xmm5
1452; SSE42-NEXT: pcmpeqd %xmm6, %xmm6
1453; SSE42-NEXT: pxor %xmm6, %xmm5
1454; SSE42-NEXT: movdqa %xmm2, %xmm7
1455; SSE42-NEXT: pxor %xmm0, %xmm7
1456; SSE42-NEXT: pxor %xmm4, %xmm0
1457; SSE42-NEXT: pcmpgtq %xmm7, %xmm0
1458; SSE42-NEXT: pxor %xmm6, %xmm0
1459; SSE42-NEXT: blendvpd %xmm4, %xmm2
1460; SSE42-NEXT: movdqa %xmm5, %xmm0
1461; SSE42-NEXT: blendvpd %xmm1, %xmm3
1462; SSE42-NEXT: movapd %xmm2, %xmm0
1463; SSE42-NEXT: movapd %xmm3, %xmm1
1464; SSE42-NEXT: retq
1465;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001466; AVX1-LABEL: min_le_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001467; AVX1: # BB#0:
1468; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1469; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1470; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
1471; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1472; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
1473; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
1474; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
1475; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
1476; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm5
1477; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3
1478; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3
1479; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
1480; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1481; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1482; AVX1-NEXT: retq
1483;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001484; AVX2-LABEL: min_le_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001485; AVX2: # BB#0:
1486; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
1487; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
1488; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
1489; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1490; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
1491; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
1492; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1493; AVX2-NEXT: retq
1494;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001495; AVX512-LABEL: min_le_v4i64:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001496; AVX512: # BB#0:
1497; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
1498; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3
1499; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2
1500; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1501; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
1502; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
1503; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1504; AVX512-NEXT: retq
1505 %1 = icmp ule <4 x i64> %a, %b
1506 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1507 ret <4 x i64> %2
1508}
1509
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001510define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
1511; SSE2-LABEL: min_le_v4i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001512; SSE2: # BB#0:
1513; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1514; SSE2-NEXT: movdqa %xmm1, %xmm2
1515; SSE2-NEXT: pxor %xmm3, %xmm2
1516; SSE2-NEXT: pxor %xmm0, %xmm3
1517; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1518; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1519; SSE2-NEXT: pxor %xmm3, %xmm2
1520; SSE2-NEXT: pandn %xmm0, %xmm3
1521; SSE2-NEXT: pandn %xmm1, %xmm2
1522; SSE2-NEXT: por %xmm3, %xmm2
1523; SSE2-NEXT: movdqa %xmm2, %xmm0
1524; SSE2-NEXT: retq
1525;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001526; SSE41-LABEL: min_le_v4i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001527; SSE41: # BB#0:
1528; SSE41-NEXT: pminud %xmm1, %xmm0
1529; SSE41-NEXT: retq
1530;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001531; SSE42-LABEL: min_le_v4i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001532; SSE42: # BB#0:
1533; SSE42-NEXT: pminud %xmm1, %xmm0
1534; SSE42-NEXT: retq
1535;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001536; AVX-LABEL: min_le_v4i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001537; AVX: # BB#0:
1538; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
1539; AVX-NEXT: retq
1540 %1 = icmp ule <4 x i32> %a, %b
1541 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1542 ret <4 x i32> %2
1543}
1544
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001545define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
1546; SSE2-LABEL: min_le_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001547; SSE2: # BB#0:
1548; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
1549; SSE2-NEXT: movdqa %xmm3, %xmm4
1550; SSE2-NEXT: pxor %xmm6, %xmm4
1551; SSE2-NEXT: movdqa %xmm1, %xmm7
1552; SSE2-NEXT: pxor %xmm6, %xmm7
1553; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
1554; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
1555; SSE2-NEXT: movdqa %xmm7, %xmm5
1556; SSE2-NEXT: pxor %xmm4, %xmm5
1557; SSE2-NEXT: movdqa %xmm2, %xmm8
1558; SSE2-NEXT: pxor %xmm6, %xmm8
1559; SSE2-NEXT: pxor %xmm0, %xmm6
1560; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
1561; SSE2-NEXT: pxor %xmm6, %xmm4
1562; SSE2-NEXT: pandn %xmm0, %xmm6
1563; SSE2-NEXT: pandn %xmm2, %xmm4
1564; SSE2-NEXT: por %xmm6, %xmm4
1565; SSE2-NEXT: pandn %xmm1, %xmm7
1566; SSE2-NEXT: pandn %xmm3, %xmm5
1567; SSE2-NEXT: por %xmm7, %xmm5
1568; SSE2-NEXT: movdqa %xmm4, %xmm0
1569; SSE2-NEXT: movdqa %xmm5, %xmm1
1570; SSE2-NEXT: retq
1571;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001572; SSE41-LABEL: min_le_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001573; SSE41: # BB#0:
1574; SSE41-NEXT: pminud %xmm2, %xmm0
1575; SSE41-NEXT: pminud %xmm3, %xmm1
1576; SSE41-NEXT: retq
1577;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001578; SSE42-LABEL: min_le_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001579; SSE42: # BB#0:
1580; SSE42-NEXT: pminud %xmm2, %xmm0
1581; SSE42-NEXT: pminud %xmm3, %xmm1
1582; SSE42-NEXT: retq
1583;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001584; AVX1-LABEL: min_le_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001585; AVX1: # BB#0:
1586; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1587; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1588; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1589; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1590; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1591; AVX1-NEXT: retq
1592;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001593; AVX2-LABEL: min_le_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001594; AVX2: # BB#0:
1595; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1596; AVX2-NEXT: retq
1597;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001598; AVX512-LABEL: min_le_v8i32:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001599; AVX512: # BB#0:
1600; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
1601; AVX512-NEXT: retq
1602 %1 = icmp ule <8 x i32> %a, %b
1603 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1604 ret <8 x i32> %2
1605}
1606
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001607define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
1608; SSE2-LABEL: min_le_v8i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001609; SSE2: # BB#0:
1610; SSE2-NEXT: movdqa %xmm0, %xmm2
1611; SSE2-NEXT: psubusw %xmm1, %xmm2
1612; SSE2-NEXT: pxor %xmm3, %xmm3
1613; SSE2-NEXT: pcmpeqw %xmm2, %xmm3
1614; SSE2-NEXT: pand %xmm3, %xmm0
1615; SSE2-NEXT: pandn %xmm1, %xmm3
1616; SSE2-NEXT: por %xmm3, %xmm0
1617; SSE2-NEXT: retq
1618;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001619; SSE41-LABEL: min_le_v8i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001620; SSE41: # BB#0:
1621; SSE41-NEXT: pminuw %xmm1, %xmm0
1622; SSE41-NEXT: retq
1623;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001624; SSE42-LABEL: min_le_v8i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001625; SSE42: # BB#0:
1626; SSE42-NEXT: pminuw %xmm1, %xmm0
1627; SSE42-NEXT: retq
1628;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001629; AVX-LABEL: min_le_v8i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001630; AVX: # BB#0:
1631; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1632; AVX-NEXT: retq
1633 %1 = icmp ule <8 x i16> %a, %b
1634 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1635 ret <8 x i16> %2
1636}
1637
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001638define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
1639; SSE2-LABEL: min_le_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001640; SSE2: # BB#0:
1641; SSE2-NEXT: movdqa %xmm1, %xmm4
1642; SSE2-NEXT: psubusw %xmm3, %xmm4
1643; SSE2-NEXT: pxor %xmm6, %xmm6
1644; SSE2-NEXT: pcmpeqw %xmm6, %xmm4
1645; SSE2-NEXT: movdqa %xmm0, %xmm5
1646; SSE2-NEXT: psubusw %xmm2, %xmm5
1647; SSE2-NEXT: pcmpeqw %xmm6, %xmm5
1648; SSE2-NEXT: pand %xmm5, %xmm0
1649; SSE2-NEXT: pandn %xmm2, %xmm5
1650; SSE2-NEXT: por %xmm0, %xmm5
1651; SSE2-NEXT: pand %xmm4, %xmm1
1652; SSE2-NEXT: pandn %xmm3, %xmm4
1653; SSE2-NEXT: por %xmm1, %xmm4
1654; SSE2-NEXT: movdqa %xmm5, %xmm0
1655; SSE2-NEXT: movdqa %xmm4, %xmm1
1656; SSE2-NEXT: retq
1657;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001658; SSE41-LABEL: min_le_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001659; SSE41: # BB#0:
1660; SSE41-NEXT: pminuw %xmm2, %xmm0
1661; SSE41-NEXT: pminuw %xmm3, %xmm1
1662; SSE41-NEXT: retq
1663;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001664; SSE42-LABEL: min_le_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001665; SSE42: # BB#0:
1666; SSE42-NEXT: pminuw %xmm2, %xmm0
1667; SSE42-NEXT: pminuw %xmm3, %xmm1
1668; SSE42-NEXT: retq
1669;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001670; AVX1-LABEL: min_le_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001671; AVX1: # BB#0:
1672; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1673; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1674; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
1675; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1676; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1677; AVX1-NEXT: retq
1678;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001679; AVX2-LABEL: min_le_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001680; AVX2: # BB#0:
1681; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1682; AVX2-NEXT: retq
1683;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001684; AVX512-LABEL: min_le_v16i16:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001685; AVX512: # BB#0:
1686; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1687; AVX512-NEXT: retq
1688 %1 = icmp ule <16 x i16> %a, %b
1689 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1690 ret <16 x i16> %2
1691}
1692
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001693define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
1694; SSE-LABEL: min_le_v16i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001695; SSE: # BB#0:
1696; SSE-NEXT: pminub %xmm1, %xmm0
1697; SSE-NEXT: retq
1698;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001699; AVX-LABEL: min_le_v16i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001700; AVX: # BB#0:
1701; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
1702; AVX-NEXT: retq
1703 %1 = icmp ule <16 x i8> %a, %b
1704 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1705 ret <16 x i8> %2
1706}
1707
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001708define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
1709; SSE-LABEL: min_le_v32i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001710; SSE: # BB#0:
1711; SSE-NEXT: pminub %xmm2, %xmm0
1712; SSE-NEXT: pminub %xmm3, %xmm1
1713; SSE-NEXT: retq
1714;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001715; AVX1-LABEL: min_le_v32i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001716; AVX1: # BB#0:
1717; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1718; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1719; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
1720; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1721; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1722; AVX1-NEXT: retq
1723;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001724; AVX2-LABEL: min_le_v32i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001725; AVX2: # BB#0:
1726; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1727; AVX2-NEXT: retq
1728;
Simon Pilgrim1fdc1772015-08-14 11:03:31 +00001729; AVX512-LABEL: min_le_v32i8:
Simon Pilgrim829091e2015-08-13 20:31:03 +00001730; AVX512: # BB#0:
1731; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
1732; AVX512-NEXT: retq
1733 %1 = icmp ule <32 x i8> %a, %b
1734 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1735 ret <32 x i8> %2
1736}
Simon Pilgrim19ffd572015-08-18 08:52:43 +00001737
1738;
1739; Constant Folding
1740;
1741
1742define <2 x i64> @max_gt_v2i64c() {
1743; SSE2-LABEL: max_gt_v2i64c:
1744; SSE2: # BB#0:
1745; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
1746; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
1747; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1748; SSE2-NEXT: movdqa %xmm0, %xmm3
1749; SSE2-NEXT: pxor %xmm2, %xmm3
1750; SSE2-NEXT: pxor %xmm1, %xmm0
1751; SSE2-NEXT: movdqa %xmm0, %xmm4
1752; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1753; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1754; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
1755; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1756; SSE2-NEXT: pand %xmm5, %xmm3
1757; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1758; SSE2-NEXT: por %xmm3, %xmm0
1759; SSE2-NEXT: movdqa %xmm0, %xmm3
1760; SSE2-NEXT: pandn %xmm2, %xmm3
1761; SSE2-NEXT: pand %xmm1, %xmm0
1762; SSE2-NEXT: por %xmm3, %xmm0
1763; SSE2-NEXT: retq
1764;
1765; SSE41-LABEL: max_gt_v2i64c:
1766; SSE41: # BB#0:
1767; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
1768; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
1769; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1770; SSE41-NEXT: movdqa %xmm0, %xmm3
1771; SSE41-NEXT: pxor %xmm1, %xmm3
1772; SSE41-NEXT: pxor %xmm2, %xmm0
1773; SSE41-NEXT: movdqa %xmm0, %xmm4
1774; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
1775; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1776; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
1777; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1778; SSE41-NEXT: pand %xmm5, %xmm3
1779; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1780; SSE41-NEXT: por %xmm3, %xmm0
1781; SSE41-NEXT: blendvpd %xmm2, %xmm1
1782; SSE41-NEXT: movapd %xmm1, %xmm0
1783; SSE41-NEXT: retq
1784;
1785; SSE42-LABEL: max_gt_v2i64c:
1786; SSE42: # BB#0:
1787; SSE42-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551615,1]
1788; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775801,9223372036854775815]
1789; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
1790; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
1791; SSE42-NEXT: movapd %xmm1, %xmm0
1792; SSE42-NEXT: retq
1793;
1794; AVX-LABEL: max_gt_v2i64c:
1795; AVX: # BB#0:
1796; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [18446744073709551615,1]
1797; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775801,9223372036854775815]
1798; AVX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
1799; AVX-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm0, %xmm0
1800; AVX-NEXT: retq
1801 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1802 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1803 %3 = icmp ugt <2 x i64> %1, %2
1804 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1805 ret <2 x i64> %4
1806}
1807
1808define <4 x i64> @max_gt_v4i64c() {
1809; SSE2-LABEL: max_gt_v4i64c:
1810; SSE2: # BB#0:
1811; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
1812; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
1813; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
1814; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,1]
1815; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1816; SSE2-NEXT: movdqa %xmm0, %xmm1
1817; SSE2-NEXT: pxor %xmm3, %xmm1
1818; SSE2-NEXT: movdqa %xmm0, %xmm6
1819; SSE2-NEXT: pxor %xmm8, %xmm6
1820; SSE2-NEXT: movdqa %xmm6, %xmm7
1821; SSE2-NEXT: pcmpgtd %xmm1, %xmm7
1822; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
1823; SSE2-NEXT: pcmpeqd %xmm1, %xmm6
1824; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1825; SSE2-NEXT: pand %xmm2, %xmm6
1826; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
1827; SSE2-NEXT: por %xmm6, %xmm1
1828; SSE2-NEXT: movdqa %xmm0, %xmm2
1829; SSE2-NEXT: pxor %xmm5, %xmm2
1830; SSE2-NEXT: pxor %xmm4, %xmm0
1831; SSE2-NEXT: movdqa %xmm0, %xmm6
1832; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
1833; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1834; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
1835; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1836; SSE2-NEXT: pand %xmm7, %xmm2
1837; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1838; SSE2-NEXT: por %xmm2, %xmm0
1839; SSE2-NEXT: movdqa %xmm0, %xmm2
1840; SSE2-NEXT: pandn %xmm5, %xmm2
1841; SSE2-NEXT: pand %xmm4, %xmm0
1842; SSE2-NEXT: por %xmm2, %xmm0
1843; SSE2-NEXT: movdqa %xmm1, %xmm2
1844; SSE2-NEXT: pandn %xmm3, %xmm2
1845; SSE2-NEXT: pand %xmm8, %xmm1
1846; SSE2-NEXT: por %xmm2, %xmm1
1847; SSE2-NEXT: retq
1848;
1849; SSE41-LABEL: max_gt_v4i64c:
1850; SSE41: # BB#0:
1851; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551609,18446744073709551615]
1852; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
1853; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
1854; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
1855; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1856; SSE41-NEXT: movdqa %xmm0, %xmm3
1857; SSE41-NEXT: pxor %xmm1, %xmm3
1858; SSE41-NEXT: movdqa %xmm0, %xmm6
1859; SSE41-NEXT: pxor %xmm8, %xmm6
1860; SSE41-NEXT: movdqa %xmm6, %xmm7
1861; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
1862; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
1863; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
1864; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1865; SSE41-NEXT: pand %xmm4, %xmm6
1866; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
1867; SSE41-NEXT: por %xmm6, %xmm3
1868; SSE41-NEXT: movdqa %xmm0, %xmm4
1869; SSE41-NEXT: pxor %xmm2, %xmm4
1870; SSE41-NEXT: pxor %xmm5, %xmm0
1871; SSE41-NEXT: movdqa %xmm0, %xmm6
1872; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
1873; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1874; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
1875; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1876; SSE41-NEXT: pand %xmm7, %xmm4
1877; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1878; SSE41-NEXT: por %xmm4, %xmm0
1879; SSE41-NEXT: blendvpd %xmm5, %xmm2
1880; SSE41-NEXT: movdqa %xmm3, %xmm0
1881; SSE41-NEXT: blendvpd %xmm8, %xmm1
1882; SSE41-NEXT: movapd %xmm2, %xmm0
1883; SSE41-NEXT: retq
1884;
1885; SSE42-LABEL: max_gt_v4i64c:
1886; SSE42: # BB#0:
1887; SSE42-NEXT: movapd {{.*#+}} xmm1 = [7,1]
1888; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
1889; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775809,9223372036854775815]
1890; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm3
1891; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775801,9223372036854775807]
1892; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
1893; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm2
1894; SSE42-NEXT: movdqa %xmm3, %xmm0
1895; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
1896; SSE42-NEXT: movapd %xmm2, %xmm0
1897; SSE42-NEXT: retq
1898;
1899; AVX1-LABEL: max_gt_v4i64c:
1900; AVX1: # BB#0:
1901; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
1902; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775801,9223372036854775807]
1903; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
1904; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775809,9223372036854775815]
1905; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm2, %xmm2
1906; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1907; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
1908; AVX1-NEXT: retq
1909;
1910; AVX2-LABEL: max_gt_v4i64c:
1911; AVX2: # BB#0:
1912; AVX2-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
1913; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775801,9223372036854775807,9223372036854775809,9223372036854775815]
1914; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
1915; AVX2-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
1916; AVX2-NEXT: retq
1917;
1918; AVX512-LABEL: max_gt_v4i64c:
1919; AVX512: # BB#0:
1920; AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
1921; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775801,9223372036854775807,9223372036854775809,9223372036854775815]
1922; AVX512-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
1923; AVX512-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
1924; AVX512-NEXT: retq
1925 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1926 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1927 %3 = icmp ugt <4 x i64> %1, %2
1928 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1929 ret <4 x i64> %4
1930}
1931
1932define <4 x i32> @max_gt_v4i32c() {
1933; SSE2-LABEL: max_gt_v4i32c:
1934; SSE2: # BB#0:
1935; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483641,2147483647,2147483649,2147483655]
1936; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
1937; SSE2-NEXT: movdqa %xmm0, %xmm1
1938; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
1939; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1940; SSE2-NEXT: por %xmm1, %xmm0
1941; SSE2-NEXT: retq
1942;
1943; SSE41-LABEL: max_gt_v4i32c:
1944; SSE41: # BB#0:
1945; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
1946; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
1947; SSE41-NEXT: retq
1948;
1949; SSE42-LABEL: max_gt_v4i32c:
1950; SSE42: # BB#0:
1951; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
1952; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
1953; SSE42-NEXT: retq
1954;
1955; AVX-LABEL: max_gt_v4i32c:
1956; AVX: # BB#0:
1957; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
1958; AVX-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
1959; AVX-NEXT: retq
1960 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1961 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 1, i32 0
1962 %3 = icmp ugt <4 x i32> %1, %2
1963 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1964 ret <4 x i32> %4
1965}
1966
1967define <8 x i32> @max_gt_v8i32c() {
1968; SSE2-LABEL: max_gt_v8i32c:
1969; SSE2: # BB#0:
1970; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483649,2147483651,2147483653,2147483655]
1971; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
1972; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483641,2147483643,2147483645,2147483647]
1973; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
1974; SSE2-NEXT: movdqa %xmm0, %xmm2
1975; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
1976; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
1977; SSE2-NEXT: por %xmm2, %xmm0
1978; SSE2-NEXT: movdqa %xmm1, %xmm2
1979; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
1980; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1981; SSE2-NEXT: por %xmm2, %xmm1
1982; SSE2-NEXT: retq
1983;
1984; SSE41-LABEL: max_gt_v8i32c:
1985; SSE41: # BB#0:
1986; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
1987; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
1988; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
1989; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm1
1990; SSE41-NEXT: retq
1991;
1992; SSE42-LABEL: max_gt_v8i32c:
1993; SSE42: # BB#0:
1994; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
1995; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
1996; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
1997; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm1
1998; SSE42-NEXT: retq
1999;
2000; AVX1-LABEL: max_gt_v8i32c:
2001; AVX1: # BB#0:
2002; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2003; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
2004; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
2005; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm1, %xmm1
2006; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2007; AVX1-NEXT: retq
2008;
2009; AVX2-LABEL: max_gt_v8i32c:
2010; AVX2: # BB#0:
2011; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2012; AVX2-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
2013; AVX2-NEXT: retq
2014;
2015; AVX512-LABEL: max_gt_v8i32c:
2016; AVX512: # BB#0:
2017; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2018; AVX512-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
2019; AVX512-NEXT: retq
2020 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
2021 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 1, i32 0
2022 %3 = icmp ugt <8 x i32> %1, %2
2023 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2024 ret <8 x i32> %4
2025}
2026
2027define <8 x i16> @max_gt_v8i16c() {
2028; SSE2-LABEL: max_gt_v8i16c:
2029; SSE2: # BB#0:
2030; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32761,32763,32765,32767,32769,32771,32773,32775]
2031; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0
2032; SSE2-NEXT: movdqa %xmm0, %xmm1
2033; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
2034; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2035; SSE2-NEXT: por %xmm1, %xmm0
2036; SSE2-NEXT: retq
2037;
2038; SSE41-LABEL: max_gt_v8i16c:
2039; SSE41: # BB#0:
2040; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2041; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2042; SSE41-NEXT: retq
2043;
2044; SSE42-LABEL: max_gt_v8i16c:
2045; SSE42: # BB#0:
2046; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2047; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2048; SSE42-NEXT: retq
2049;
2050; AVX-LABEL: max_gt_v8i16c:
2051; AVX: # BB#0:
2052; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2053; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
2054; AVX-NEXT: retq
2055 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i16 0
2056 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 1, i16 0
2057 %3 = icmp ugt <8 x i16> %1, %2
2058 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2059 ret <8 x i16> %4
2060}
2061
2062define <16 x i16> @max_gt_v16i16c() {
2063; SSE2-LABEL: max_gt_v16i16c:
2064; SSE2: # BB#0:
2065; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32769,32770,32771,32772,32773,32774,32775,32776]
2066; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm1
2067; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32761,32762,32763,32764,32765,32766,32767,32768]
2068; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0
2069; SSE2-NEXT: movdqa %xmm0, %xmm2
2070; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2071; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2072; SSE2-NEXT: por %xmm2, %xmm0
2073; SSE2-NEXT: movdqa %xmm1, %xmm2
2074; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2075; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
2076; SSE2-NEXT: por %xmm2, %xmm1
2077; SSE2-NEXT: retq
2078;
2079; SSE41-LABEL: max_gt_v16i16c:
2080; SSE41: # BB#0:
2081; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2082; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2083; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2084; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1
2085; SSE41-NEXT: retq
2086;
2087; SSE42-LABEL: max_gt_v16i16c:
2088; SSE42: # BB#0:
2089; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2090; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2091; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2092; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm1
2093; SSE42-NEXT: retq
2094;
2095; AVX1-LABEL: max_gt_v16i16c:
2096; AVX1: # BB#0:
2097; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2098; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
2099; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2100; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1
2101; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2102; AVX1-NEXT: retq
2103;
2104; AVX2-LABEL: max_gt_v16i16c:
2105; AVX2: # BB#0:
2106; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2107; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
2108; AVX2-NEXT: retq
2109;
2110; AVX512-LABEL: max_gt_v16i16c:
2111; AVX512: # BB#0:
2112; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2113; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
2114; AVX512-NEXT: retq
2115 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i16 0
2116 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 1, i16 0
2117 %3 = icmp ugt <16 x i16> %1, %2
2118 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2119 ret <16 x i16> %4
2120}
2121
2122define <16 x i8> @max_gt_v16i8c() {
2123; SSE-LABEL: max_gt_v16i8c:
2124; SSE: # BB#0:
2125; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2126; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0
2127; SSE-NEXT: retq
2128;
2129; AVX-LABEL: max_gt_v16i8c:
2130; AVX: # BB#0:
2131; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2132; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0
2133; AVX-NEXT: retq
2134 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i8 0
2135 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 1, i8 0
2136 %3 = icmp ugt <16 x i8> %1, %2
2137 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2138 ret <16 x i8> %4
2139}
2140
2141define <2 x i64> @max_ge_v2i64c() {
2142; SSE2-LABEL: max_ge_v2i64c:
2143; SSE2: # BB#0:
2144; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
2145; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
2146; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2147; SSE2-NEXT: movdqa %xmm0, %xmm3
2148; SSE2-NEXT: pxor %xmm1, %xmm3
2149; SSE2-NEXT: pxor %xmm2, %xmm0
2150; SSE2-NEXT: movdqa %xmm0, %xmm4
2151; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2152; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2153; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
2154; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2155; SSE2-NEXT: pand %xmm5, %xmm0
2156; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2157; SSE2-NEXT: por %xmm0, %xmm3
2158; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2159; SSE2-NEXT: pxor %xmm3, %xmm0
2160; SSE2-NEXT: pandn %xmm1, %xmm3
2161; SSE2-NEXT: pandn %xmm2, %xmm0
2162; SSE2-NEXT: por %xmm3, %xmm0
2163; SSE2-NEXT: retq
2164;
2165; SSE41-LABEL: max_ge_v2i64c:
2166; SSE41: # BB#0:
2167; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2168; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2169; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2170; SSE41-NEXT: movdqa %xmm0, %xmm3
2171; SSE41-NEXT: pxor %xmm2, %xmm3
2172; SSE41-NEXT: pxor %xmm1, %xmm0
2173; SSE41-NEXT: movdqa %xmm0, %xmm4
2174; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
2175; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2176; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2177; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2178; SSE41-NEXT: pand %xmm5, %xmm0
2179; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2180; SSE41-NEXT: por %xmm0, %xmm3
2181; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
2182; SSE41-NEXT: pxor %xmm3, %xmm0
2183; SSE41-NEXT: blendvpd %xmm2, %xmm1
2184; SSE41-NEXT: movapd %xmm1, %xmm0
2185; SSE41-NEXT: retq
2186;
2187; SSE42-LABEL: max_ge_v2i64c:
2188; SSE42: # BB#0:
2189; SSE42-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551615,1]
2190; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775807,9223372036854775809]
2191; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm2
2192; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
2193; SSE42-NEXT: pxor %xmm2, %xmm0
2194; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
2195; SSE42-NEXT: movapd %xmm1, %xmm0
2196; SSE42-NEXT: retq
2197;
2198; AVX-LABEL: max_ge_v2i64c:
2199; AVX: # BB#0:
2200; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [18446744073709551615,1]
2201; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775807,9223372036854775809]
2202; AVX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2203; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
2204; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
2205; AVX-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm0, %xmm0
2206; AVX-NEXT: retq
2207 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
2208 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
2209 %3 = icmp uge <2 x i64> %1, %2
2210 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2211 ret <2 x i64> %4
2212}
2213
2214define <4 x i64> @max_ge_v4i64c() {
2215; SSE2-LABEL: max_ge_v4i64c:
2216; SSE2: # BB#0:
2217; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [18446744073709551609,18446744073709551615]
2218; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2219; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
2220; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [7,1]
2221; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
2222; SSE2-NEXT: movdqa %xmm7, %xmm0
2223; SSE2-NEXT: pxor %xmm8, %xmm0
2224; SSE2-NEXT: movdqa %xmm7, %xmm1
2225; SSE2-NEXT: pxor %xmm9, %xmm1
2226; SSE2-NEXT: movdqa %xmm1, %xmm6
2227; SSE2-NEXT: pcmpgtd %xmm0, %xmm6
2228; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
2229; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
2230; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2231; SSE2-NEXT: pand %xmm2, %xmm0
2232; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2233; SSE2-NEXT: por %xmm0, %xmm6
2234; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2235; SSE2-NEXT: movdqa %xmm6, %xmm1
2236; SSE2-NEXT: pxor %xmm0, %xmm1
2237; SSE2-NEXT: movdqa %xmm7, %xmm2
2238; SSE2-NEXT: pxor %xmm10, %xmm2
2239; SSE2-NEXT: pxor %xmm5, %xmm7
2240; SSE2-NEXT: movdqa %xmm7, %xmm3
2241; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
2242; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2243; SSE2-NEXT: pcmpeqd %xmm2, %xmm7
2244; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
2245; SSE2-NEXT: pand %xmm4, %xmm2
2246; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2247; SSE2-NEXT: por %xmm2, %xmm3
2248; SSE2-NEXT: pxor %xmm3, %xmm0
2249; SSE2-NEXT: pandn %xmm10, %xmm3
2250; SSE2-NEXT: pandn %xmm5, %xmm0
2251; SSE2-NEXT: por %xmm3, %xmm0
2252; SSE2-NEXT: pandn %xmm8, %xmm6
2253; SSE2-NEXT: pandn %xmm9, %xmm1
2254; SSE2-NEXT: por %xmm6, %xmm1
2255; SSE2-NEXT: retq
2256;
2257; SSE41-LABEL: max_ge_v4i64c:
2258; SSE41: # BB#0:
2259; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [18446744073709551609,18446744073709551615]
2260; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2261; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2262; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2263; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2264; SSE41-NEXT: movdqa %xmm0, %xmm3
2265; SSE41-NEXT: pxor %xmm8, %xmm3
2266; SSE41-NEXT: movdqa %xmm0, %xmm6
2267; SSE41-NEXT: pxor %xmm1, %xmm6
2268; SSE41-NEXT: movdqa %xmm6, %xmm7
2269; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
2270; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
2271; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
2272; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2273; SSE41-NEXT: pand %xmm4, %xmm6
2274; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2275; SSE41-NEXT: por %xmm6, %xmm3
2276; SSE41-NEXT: pcmpeqd %xmm4, %xmm4
2277; SSE41-NEXT: pxor %xmm4, %xmm3
2278; SSE41-NEXT: movdqa %xmm0, %xmm6
2279; SSE41-NEXT: pxor %xmm9, %xmm6
2280; SSE41-NEXT: pxor %xmm2, %xmm0
2281; SSE41-NEXT: movdqa %xmm0, %xmm7
2282; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
2283; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
2284; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
2285; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2286; SSE41-NEXT: pand %xmm5, %xmm6
2287; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2288; SSE41-NEXT: por %xmm6, %xmm0
2289; SSE41-NEXT: pxor %xmm4, %xmm0
2290; SSE41-NEXT: blendvpd %xmm9, %xmm2
2291; SSE41-NEXT: movdqa %xmm3, %xmm0
2292; SSE41-NEXT: blendvpd %xmm8, %xmm1
2293; SSE41-NEXT: movapd %xmm2, %xmm0
2294; SSE41-NEXT: retq
2295;
2296; SSE42-LABEL: max_ge_v4i64c:
2297; SSE42: # BB#0:
2298; SSE42-NEXT: movapd {{.*#+}} xmm1 = [7,1]
2299; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2300; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775815,9223372036854775809]
2301; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm3
2302; SSE42-NEXT: pcmpeqd %xmm4, %xmm4
2303; SSE42-NEXT: pxor %xmm4, %xmm3
2304; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775801]
2305; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
2306; SSE42-NEXT: pxor %xmm4, %xmm0
2307; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm2
2308; SSE42-NEXT: movdqa %xmm3, %xmm0
2309; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
2310; SSE42-NEXT: movapd %xmm2, %xmm0
2311; SSE42-NEXT: retq
2312;
2313; AVX1-LABEL: max_ge_v4i64c:
2314; AVX1: # BB#0:
2315; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2316; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775815,9223372036854775809]
2317; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2318; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
2319; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
2320; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775807,9223372036854775801]
2321; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm3, %xmm3
2322; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2
2323; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2324; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2325; AVX1-NEXT: retq
2326;
2327; AVX2-LABEL: max_ge_v4i64c:
2328; AVX2: # BB#0:
2329; AVX2-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2330; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775807,9223372036854775801,9223372036854775815,9223372036854775809]
2331; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
2332; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
2333; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
2334; AVX2-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2335; AVX2-NEXT: retq
2336;
2337; AVX512-LABEL: max_ge_v4i64c:
2338; AVX512: # BB#0:
2339; AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2340; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775807,9223372036854775801,9223372036854775815,9223372036854775809]
2341; AVX512-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
2342; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
2343; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
2344; AVX512-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2345; AVX512-NEXT: retq
2346 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
2347 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
2348 %3 = icmp uge <4 x i64> %1, %2
2349 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2350 ret <4 x i64> %4
2351}
2352
2353define <4 x i32> @max_ge_v4i32c() {
2354; SSE2-LABEL: max_ge_v4i32c:
2355; SSE2: # BB#0:
2356; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483649,2147483641,2147483655,2147483649]
2357; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
2358; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
2359; SSE2-NEXT: pxor %xmm0, %xmm1
2360; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
2361; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
2362; SSE2-NEXT: por %xmm1, %xmm0
2363; SSE2-NEXT: retq
2364;
2365; SSE41-LABEL: max_ge_v4i32c:
2366; SSE41: # BB#0:
2367; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2368; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
2369; SSE41-NEXT: retq
2370;
2371; SSE42-LABEL: max_ge_v4i32c:
2372; SSE42: # BB#0:
2373; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2374; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
2375; SSE42-NEXT: retq
2376;
2377; AVX-LABEL: max_ge_v4i32c:
2378; AVX: # BB#0:
2379; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2380; AVX-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
2381; AVX-NEXT: retq
2382 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
2383 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 1, i32 0
2384 %3 = icmp uge <4 x i32> %1, %2
2385 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2386 ret <4 x i32> %4
2387}
2388
2389define <8 x i32> @max_ge_v8i32c() {
2390; SSE2-LABEL: max_ge_v8i32c:
2391; SSE2: # BB#0:
2392; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483653,2147483651,2147483649]
2393; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
2394; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
2395; SSE2-NEXT: movdqa %xmm1, %xmm3
2396; SSE2-NEXT: pxor %xmm2, %xmm3
2397; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483649,2147483645,2147483643,2147483641]
2398; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
2399; SSE2-NEXT: pxor %xmm0, %xmm2
2400; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2401; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
2402; SSE2-NEXT: por %xmm2, %xmm0
2403; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3
2404; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
2405; SSE2-NEXT: por %xmm3, %xmm1
2406; SSE2-NEXT: retq
2407;
2408; SSE41-LABEL: max_ge_v8i32c:
2409; SSE41: # BB#0:
2410; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2411; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2412; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0
2413; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm1
2414; SSE41-NEXT: retq
2415;
2416; SSE42-LABEL: max_ge_v8i32c:
2417; SSE42: # BB#0:
2418; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2419; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2420; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0
2421; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm1
2422; SSE42-NEXT: retq
2423;
2424; AVX1-LABEL: max_ge_v8i32c:
2425; AVX1: # BB#0:
2426; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2427; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0
2428; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
2429; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm1, %xmm1
2430; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2431; AVX1-NEXT: retq
2432;
2433; AVX2-LABEL: max_ge_v8i32c:
2434; AVX2: # BB#0:
2435; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2436; AVX2-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
2437; AVX2-NEXT: retq
2438;
2439; AVX512-LABEL: max_ge_v8i32c:
2440; AVX512: # BB#0:
2441; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2442; AVX512-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0
2443; AVX512-NEXT: retq
2444 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
2445 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 1, i32 0
2446 %3 = icmp uge <8 x i32> %1, %2
2447 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2448 ret <8 x i32> %4
2449}
2450
2451define <8 x i16> @max_ge_v8i16c() {
2452; SSE2-LABEL: max_ge_v8i16c:
2453; SSE2: # BB#0:
2454; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65529,65531,65533,65535,1,3,5,7]
2455; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,65533,65531,65529,7,5,3,1]
2456; SSE2-NEXT: movdqa %xmm2, %xmm3
2457; SSE2-NEXT: psubusw %xmm1, %xmm3
2458; SSE2-NEXT: pxor %xmm0, %xmm0
2459; SSE2-NEXT: pcmpeqw %xmm3, %xmm0
2460; SSE2-NEXT: pand %xmm0, %xmm1
2461; SSE2-NEXT: pandn %xmm2, %xmm0
2462; SSE2-NEXT: por %xmm1, %xmm0
2463; SSE2-NEXT: retq
2464;
2465; SSE41-LABEL: max_ge_v8i16c:
2466; SSE41: # BB#0:
2467; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2468; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2469; SSE41-NEXT: retq
2470;
2471; SSE42-LABEL: max_ge_v8i16c:
2472; SSE42: # BB#0:
2473; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2474; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2475; SSE42-NEXT: retq
2476;
2477; AVX-LABEL: max_ge_v8i16c:
2478; AVX: # BB#0:
2479; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2480; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
2481; AVX-NEXT: retq
2482 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i16 0
2483 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 1, i16 0
2484 %3 = icmp uge <8 x i16> %1, %2
2485 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2486 ret <8 x i16> %4
2487}
2488
2489define <16 x i16> @max_ge_v16i16c() {
2490; SSE2-LABEL: max_ge_v16i16c:
2491; SSE2: # BB#0:
2492; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65529,65530,65531,65532,65533,65534,65535,0]
2493; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,3,4,5,6,7,8]
2494; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,65534,65533,65532,65531,65530,65529,0]
2495; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0]
2496; SSE2-NEXT: movdqa %xmm5, %xmm1
2497; SSE2-NEXT: psubusw %xmm3, %xmm1
2498; SSE2-NEXT: pxor %xmm6, %xmm6
2499; SSE2-NEXT: pcmpeqw %xmm6, %xmm1
2500; SSE2-NEXT: movdqa %xmm4, %xmm0
2501; SSE2-NEXT: psubusw %xmm2, %xmm0
2502; SSE2-NEXT: pcmpeqw %xmm6, %xmm0
2503; SSE2-NEXT: pand %xmm0, %xmm2
2504; SSE2-NEXT: pandn %xmm4, %xmm0
2505; SSE2-NEXT: por %xmm2, %xmm0
2506; SSE2-NEXT: pand %xmm1, %xmm3
2507; SSE2-NEXT: pandn %xmm5, %xmm1
2508; SSE2-NEXT: por %xmm3, %xmm1
2509; SSE2-NEXT: retq
2510;
2511; SSE41-LABEL: max_ge_v16i16c:
2512; SSE41: # BB#0:
2513; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2514; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2515; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2516; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1
2517; SSE41-NEXT: retq
2518;
2519; SSE42-LABEL: max_ge_v16i16c:
2520; SSE42: # BB#0:
2521; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2522; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2523; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0
2524; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm1
2525; SSE42-NEXT: retq
2526;
2527; AVX1-LABEL: max_ge_v16i16c:
2528; AVX1: # BB#0:
2529; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2530; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0
2531; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2532; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1
2533; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2534; AVX1-NEXT: retq
2535;
2536; AVX2-LABEL: max_ge_v16i16c:
2537; AVX2: # BB#0:
2538; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2539; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
2540; AVX2-NEXT: retq
2541;
2542; AVX512-LABEL: max_ge_v16i16c:
2543; AVX512: # BB#0:
2544; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2545; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0
2546; AVX512-NEXT: retq
2547 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i16 0
2548 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 1, i16 0
2549 %3 = icmp uge <16 x i16> %1, %2
2550 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2551 ret <16 x i16> %4
2552}
2553
2554define <16 x i8> @max_ge_v16i8c() {
2555; SSE-LABEL: max_ge_v16i8c:
2556; SSE: # BB#0:
2557; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2558; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0
2559; SSE-NEXT: retq
2560;
2561; AVX-LABEL: max_ge_v16i8c:
2562; AVX: # BB#0:
2563; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2564; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0
2565; AVX-NEXT: retq
2566 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i8 0
2567 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 1, i8 0
2568 %3 = icmp uge <16 x i8> %1, %2
2569 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2570 ret <16 x i8> %4
2571}
2572
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002573define <2 x i64> @min_lt_v2i64c() {
2574; SSE2-LABEL: min_lt_v2i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002575; SSE2: # BB#0:
2576; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
2577; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
2578; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2579; SSE2-NEXT: movdqa %xmm0, %xmm3
2580; SSE2-NEXT: pxor %xmm1, %xmm3
2581; SSE2-NEXT: pxor %xmm2, %xmm0
2582; SSE2-NEXT: movdqa %xmm0, %xmm4
2583; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2584; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2585; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
2586; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2587; SSE2-NEXT: pand %xmm5, %xmm3
2588; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2589; SSE2-NEXT: por %xmm3, %xmm0
2590; SSE2-NEXT: movdqa %xmm0, %xmm3
2591; SSE2-NEXT: pandn %xmm2, %xmm3
2592; SSE2-NEXT: pand %xmm1, %xmm0
2593; SSE2-NEXT: por %xmm3, %xmm0
2594; SSE2-NEXT: retq
2595;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002596; SSE41-LABEL: min_lt_v2i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002597; SSE41: # BB#0:
2598; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
2599; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
2600; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2601; SSE41-NEXT: movdqa %xmm0, %xmm3
2602; SSE41-NEXT: pxor %xmm2, %xmm3
2603; SSE41-NEXT: pxor %xmm1, %xmm0
2604; SSE41-NEXT: movdqa %xmm0, %xmm4
2605; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
2606; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2607; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2608; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2609; SSE41-NEXT: pand %xmm5, %xmm3
2610; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2611; SSE41-NEXT: por %xmm3, %xmm0
2612; SSE41-NEXT: blendvpd %xmm2, %xmm1
2613; SSE41-NEXT: movapd %xmm1, %xmm0
2614; SSE41-NEXT: retq
2615;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002616; SSE42-LABEL: min_lt_v2i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002617; SSE42: # BB#0:
2618; SSE42-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551615,1]
2619; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775809]
2620; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
2621; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
2622; SSE42-NEXT: movapd %xmm1, %xmm0
2623; SSE42-NEXT: retq
2624;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002625; AVX-LABEL: min_lt_v2i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002626; AVX: # BB#0:
2627; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [18446744073709551615,1]
2628; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775807,9223372036854775809]
2629; AVX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2630; AVX-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm0, %xmm0
2631; AVX-NEXT: retq
2632 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
2633 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
2634 %3 = icmp ult <2 x i64> %1, %2
2635 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2636 ret <2 x i64> %4
2637}
2638
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002639define <4 x i64> @min_lt_v4i64c() {
2640; SSE2-LABEL: min_lt_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002641; SSE2: # BB#0:
2642; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551609,18446744073709551615]
2643; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2644; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
2645; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,1]
2646; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2647; SSE2-NEXT: movdqa %xmm0, %xmm1
2648; SSE2-NEXT: pxor %xmm8, %xmm1
2649; SSE2-NEXT: movdqa %xmm0, %xmm6
2650; SSE2-NEXT: pxor %xmm3, %xmm6
2651; SSE2-NEXT: movdqa %xmm6, %xmm7
2652; SSE2-NEXT: pcmpgtd %xmm1, %xmm7
2653; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
2654; SSE2-NEXT: pcmpeqd %xmm1, %xmm6
2655; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2656; SSE2-NEXT: pand %xmm2, %xmm6
2657; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
2658; SSE2-NEXT: por %xmm6, %xmm1
2659; SSE2-NEXT: movdqa %xmm0, %xmm2
2660; SSE2-NEXT: pxor %xmm4, %xmm2
2661; SSE2-NEXT: pxor %xmm5, %xmm0
2662; SSE2-NEXT: movdqa %xmm0, %xmm6
2663; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
2664; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2665; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
2666; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
2667; SSE2-NEXT: pand %xmm7, %xmm2
2668; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2669; SSE2-NEXT: por %xmm2, %xmm0
2670; SSE2-NEXT: movdqa %xmm0, %xmm2
2671; SSE2-NEXT: pandn %xmm5, %xmm2
2672; SSE2-NEXT: pand %xmm4, %xmm0
2673; SSE2-NEXT: por %xmm2, %xmm0
2674; SSE2-NEXT: movdqa %xmm1, %xmm2
2675; SSE2-NEXT: pandn %xmm3, %xmm2
2676; SSE2-NEXT: pand %xmm8, %xmm1
2677; SSE2-NEXT: por %xmm2, %xmm1
2678; SSE2-NEXT: retq
2679;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002680; SSE41-LABEL: min_lt_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002681; SSE41: # BB#0:
2682; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551609,18446744073709551615]
2683; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
2684; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2685; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
2686; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2687; SSE41-NEXT: movdqa %xmm0, %xmm3
2688; SSE41-NEXT: pxor %xmm8, %xmm3
2689; SSE41-NEXT: movdqa %xmm0, %xmm6
2690; SSE41-NEXT: pxor %xmm1, %xmm6
2691; SSE41-NEXT: movdqa %xmm6, %xmm7
2692; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
2693; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
2694; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
2695; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2696; SSE41-NEXT: pand %xmm4, %xmm6
2697; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2698; SSE41-NEXT: por %xmm6, %xmm3
2699; SSE41-NEXT: movdqa %xmm0, %xmm4
2700; SSE41-NEXT: pxor %xmm5, %xmm4
2701; SSE41-NEXT: pxor %xmm2, %xmm0
2702; SSE41-NEXT: movdqa %xmm0, %xmm6
2703; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
2704; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2705; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
2706; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2707; SSE41-NEXT: pand %xmm7, %xmm4
2708; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2709; SSE41-NEXT: por %xmm4, %xmm0
2710; SSE41-NEXT: blendvpd %xmm5, %xmm2
2711; SSE41-NEXT: movdqa %xmm3, %xmm0
2712; SSE41-NEXT: blendvpd %xmm8, %xmm1
2713; SSE41-NEXT: movapd %xmm2, %xmm0
2714; SSE41-NEXT: retq
2715;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002716; SSE42-LABEL: min_lt_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002717; SSE42: # BB#0:
2718; SSE42-NEXT: movapd {{.*#+}} xmm1 = [7,1]
2719; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
2720; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775815,9223372036854775809]
2721; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm3
2722; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775801]
2723; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
2724; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm2
2725; SSE42-NEXT: movdqa %xmm3, %xmm0
2726; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
2727; SSE42-NEXT: movapd %xmm2, %xmm0
2728; SSE42-NEXT: retq
2729;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002730; AVX1-LABEL: min_lt_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002731; AVX1: # BB#0:
2732; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2733; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775807,9223372036854775801]
2734; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
2735; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775815,9223372036854775809]
2736; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm2, %xmm2
2737; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2738; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2739; AVX1-NEXT: retq
2740;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002741; AVX2-LABEL: min_lt_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002742; AVX2: # BB#0:
2743; AVX2-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2744; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775807,9223372036854775801,9223372036854775815,9223372036854775809]
2745; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
2746; AVX2-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2747; AVX2-NEXT: retq
2748;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002749; AVX512-LABEL: min_lt_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002750; AVX512: # BB#0:
2751; AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
2752; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775807,9223372036854775801,9223372036854775815,9223372036854775809]
2753; AVX512-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
2754; AVX512-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
2755; AVX512-NEXT: retq
2756 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
2757 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
2758 %3 = icmp ult <4 x i64> %1, %2
2759 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2760 ret <4 x i64> %4
2761}
2762
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002763define <4 x i32> @min_lt_v4i32c() {
2764; SSE2-LABEL: min_lt_v4i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002765; SSE2: # BB#0:
2766; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483649,2147483641,2147483655,2147483649]
2767; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
2768; SSE2-NEXT: movdqa %xmm0, %xmm1
2769; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
2770; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2771; SSE2-NEXT: por %xmm1, %xmm0
2772; SSE2-NEXT: retq
2773;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002774; SSE41-LABEL: min_lt_v4i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002775; SSE41: # BB#0:
2776; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2777; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
2778; SSE41-NEXT: retq
2779;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002780; SSE42-LABEL: min_lt_v4i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002781; SSE42: # BB#0:
2782; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2783; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
2784; SSE42-NEXT: retq
2785;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002786; AVX-LABEL: min_lt_v4i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002787; AVX: # BB#0:
2788; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
2789; AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
2790; AVX-NEXT: retq
2791 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
2792 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 1, i32 0
2793 %3 = icmp ult <4 x i32> %1, %2
2794 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2795 ret <4 x i32> %4
2796}
2797
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002798define <8 x i32> @min_lt_v8i32c() {
2799; SSE2-LABEL: min_lt_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002800; SSE2: # BB#0:
2801; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483653,2147483651,2147483649]
2802; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
2803; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483649,2147483645,2147483643,2147483641]
2804; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
2805; SSE2-NEXT: movdqa %xmm0, %xmm2
2806; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2807; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2808; SSE2-NEXT: por %xmm2, %xmm0
2809; SSE2-NEXT: movdqa %xmm1, %xmm2
2810; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2811; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
2812; SSE2-NEXT: por %xmm2, %xmm1
2813; SSE2-NEXT: retq
2814;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002815; SSE41-LABEL: min_lt_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002816; SSE41: # BB#0:
2817; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2818; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2819; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
2820; SSE41-NEXT: pminud {{.*}}(%rip), %xmm1
2821; SSE41-NEXT: retq
2822;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002823; SSE42-LABEL: min_lt_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002824; SSE42: # BB#0:
2825; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
2826; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2827; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
2828; SSE42-NEXT: pminud {{.*}}(%rip), %xmm1
2829; SSE42-NEXT: retq
2830;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002831; AVX1-LABEL: min_lt_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002832; AVX1: # BB#0:
2833; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
2834; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
2835; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
2836; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm1, %xmm1
2837; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2838; AVX1-NEXT: retq
2839;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002840; AVX2-LABEL: min_lt_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002841; AVX2: # BB#0:
2842; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2843; AVX2-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
2844; AVX2-NEXT: retq
2845;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002846; AVX512-LABEL: min_lt_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002847; AVX512: # BB#0:
2848; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
2849; AVX512-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
2850; AVX512-NEXT: retq
2851 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
2852 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 1, i32 0
2853 %3 = icmp ult <8 x i32> %1, %2
2854 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2855 ret <8 x i32> %4
2856}
2857
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002858define <8 x i16> @min_lt_v8i16c() {
2859; SSE2-LABEL: min_lt_v8i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002860; SSE2: # BB#0:
2861; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65529,65531,65533,65535,1,3,5,7]
2862; SSE2-NEXT: movdqa %xmm1, %xmm2
2863; SSE2-NEXT: psubusw {{.*}}(%rip), %xmm2
2864; SSE2-NEXT: pxor %xmm0, %xmm0
2865; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
2866; SSE2-NEXT: pand %xmm0, %xmm1
2867; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
2868; SSE2-NEXT: por %xmm1, %xmm0
2869; SSE2-NEXT: retq
2870;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002871; SSE41-LABEL: min_lt_v8i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002872; SSE41: # BB#0:
2873; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2874; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
2875; SSE41-NEXT: retq
2876;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002877; SSE42-LABEL: min_lt_v8i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002878; SSE42: # BB#0:
2879; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2880; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
2881; SSE42-NEXT: retq
2882;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002883; AVX-LABEL: min_lt_v8i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002884; AVX: # BB#0:
2885; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
2886; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
2887; AVX-NEXT: retq
2888 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i16 0
2889 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 1, i16 0
2890 %3 = icmp ult <8 x i16> %1, %2
2891 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2892 ret <8 x i16> %4
2893}
2894
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002895define <16 x i16> @min_lt_v16i16c() {
2896; SSE2-LABEL: min_lt_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002897; SSE2: # BB#0:
2898; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32775,32774,32773,32772,32771,32770,32769,32768]
2899; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm1
2900; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32769,32766,32765,32764,32763,32762,32761,32768]
2901; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0
2902; SSE2-NEXT: movdqa %xmm0, %xmm2
2903; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2904; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
2905; SSE2-NEXT: por %xmm2, %xmm0
2906; SSE2-NEXT: movdqa %xmm1, %xmm2
2907; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
2908; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
2909; SSE2-NEXT: por %xmm2, %xmm1
2910; SSE2-NEXT: retq
2911;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002912; SSE41-LABEL: min_lt_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002913; SSE41: # BB#0:
2914; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2915; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2916; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
2917; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm1
2918; SSE41-NEXT: retq
2919;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002920; SSE42-LABEL: min_lt_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002921; SSE42: # BB#0:
2922; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2923; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2924; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
2925; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm1
2926; SSE42-NEXT: retq
2927;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002928; AVX1-LABEL: min_lt_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002929; AVX1: # BB#0:
2930; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
2931; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
2932; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
2933; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm1, %xmm1
2934; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2935; AVX1-NEXT: retq
2936;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002937; AVX2-LABEL: min_lt_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002938; AVX2: # BB#0:
2939; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2940; AVX2-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
2941; AVX2-NEXT: retq
2942;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002943; AVX512-LABEL: min_lt_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002944; AVX512: # BB#0:
2945; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
2946; AVX512-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
2947; AVX512-NEXT: retq
2948 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i16 0
2949 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 1, i16 0
2950 %3 = icmp ult <16 x i16> %1, %2
2951 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2952 ret <16 x i16> %4
2953}
2954
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002955define <16 x i8> @min_lt_v16i8c() {
2956; SSE-LABEL: min_lt_v16i8c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002957; SSE: # BB#0:
2958; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2959; SSE-NEXT: pminub {{.*}}(%rip), %xmm0
2960; SSE-NEXT: retq
2961;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002962; AVX-LABEL: min_lt_v16i8c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002963; AVX: # BB#0:
2964; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
2965; AVX-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm0
2966; AVX-NEXT: retq
2967 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i8 0
2968 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 1, i8 0
2969 %3 = icmp ult <16 x i8> %1, %2
2970 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2971 ret <16 x i8> %4
2972}
2973
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002974define <2 x i64> @min_le_v2i64c() {
2975; SSE2-LABEL: min_le_v2i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002976; SSE2: # BB#0:
2977; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551609,7]
2978; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,1]
2979; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
2980; SSE2-NEXT: movdqa %xmm0, %xmm3
2981; SSE2-NEXT: pxor %xmm2, %xmm3
2982; SSE2-NEXT: pxor %xmm1, %xmm0
2983; SSE2-NEXT: movdqa %xmm0, %xmm4
2984; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2985; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2986; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
2987; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2988; SSE2-NEXT: pand %xmm5, %xmm0
2989; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2990; SSE2-NEXT: por %xmm0, %xmm3
2991; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
2992; SSE2-NEXT: pxor %xmm3, %xmm0
2993; SSE2-NEXT: pandn %xmm1, %xmm3
2994; SSE2-NEXT: pandn %xmm2, %xmm0
2995; SSE2-NEXT: por %xmm3, %xmm0
2996; SSE2-NEXT: retq
2997;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00002998; SSE41-LABEL: min_le_v2i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00002999; SSE41: # BB#0:
3000; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551609,7]
3001; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551615,1]
3002; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
3003; SSE41-NEXT: movdqa %xmm0, %xmm3
3004; SSE41-NEXT: pxor %xmm1, %xmm3
3005; SSE41-NEXT: pxor %xmm2, %xmm0
3006; SSE41-NEXT: movdqa %xmm0, %xmm4
3007; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
3008; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
3009; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
3010; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3011; SSE41-NEXT: pand %xmm5, %xmm0
3012; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
3013; SSE41-NEXT: por %xmm0, %xmm3
3014; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
3015; SSE41-NEXT: pxor %xmm3, %xmm0
3016; SSE41-NEXT: blendvpd %xmm2, %xmm1
3017; SSE41-NEXT: movapd %xmm1, %xmm0
3018; SSE41-NEXT: retq
3019;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003020; SSE42-LABEL: min_le_v2i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003021; SSE42: # BB#0:
3022; SSE42-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551615,1]
3023; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775801,9223372036854775815]
3024; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm2
3025; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
3026; SSE42-NEXT: pxor %xmm2, %xmm0
3027; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
3028; SSE42-NEXT: movapd %xmm1, %xmm0
3029; SSE42-NEXT: retq
3030;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003031; AVX-LABEL: min_le_v2i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003032; AVX: # BB#0:
3033; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [18446744073709551615,1]
3034; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775801,9223372036854775815]
3035; AVX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
3036; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
3037; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
3038; AVX-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm0, %xmm0
3039; AVX-NEXT: retq
3040 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
3041 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
3042 %3 = icmp ule <2 x i64> %1, %2
3043 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
3044 ret <2 x i64> %4
3045}
3046
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003047define <4 x i64> @min_le_v4i64c() {
3048; SSE2-LABEL: min_le_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003049; SSE2: # BB#0:
3050; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [18446744073709551609,18446744073709551615]
3051; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
3052; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744073709551615,18446744073709551609]
3053; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [7,1]
3054; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
3055; SSE2-NEXT: movdqa %xmm7, %xmm0
3056; SSE2-NEXT: pxor %xmm9, %xmm0
3057; SSE2-NEXT: movdqa %xmm7, %xmm1
3058; SSE2-NEXT: pxor %xmm8, %xmm1
3059; SSE2-NEXT: movdqa %xmm1, %xmm6
3060; SSE2-NEXT: pcmpgtd %xmm0, %xmm6
3061; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
3062; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
3063; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
3064; SSE2-NEXT: pand %xmm2, %xmm0
3065; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
3066; SSE2-NEXT: por %xmm0, %xmm6
3067; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
3068; SSE2-NEXT: movdqa %xmm6, %xmm1
3069; SSE2-NEXT: pxor %xmm0, %xmm1
3070; SSE2-NEXT: movdqa %xmm7, %xmm2
3071; SSE2-NEXT: pxor %xmm5, %xmm2
3072; SSE2-NEXT: pxor %xmm10, %xmm7
3073; SSE2-NEXT: movdqa %xmm7, %xmm3
3074; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
3075; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
3076; SSE2-NEXT: pcmpeqd %xmm2, %xmm7
3077; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
3078; SSE2-NEXT: pand %xmm4, %xmm2
3079; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3080; SSE2-NEXT: por %xmm2, %xmm3
3081; SSE2-NEXT: pxor %xmm3, %xmm0
3082; SSE2-NEXT: pandn %xmm10, %xmm3
3083; SSE2-NEXT: pandn %xmm5, %xmm0
3084; SSE2-NEXT: por %xmm3, %xmm0
3085; SSE2-NEXT: pandn %xmm8, %xmm6
3086; SSE2-NEXT: pandn %xmm9, %xmm1
3087; SSE2-NEXT: por %xmm6, %xmm1
3088; SSE2-NEXT: retq
3089;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003090; SSE41-LABEL: min_le_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003091; SSE41: # BB#0:
3092; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [18446744073709551609,18446744073709551615]
3093; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [1,7]
3094; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
3095; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7,1]
3096; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
3097; SSE41-NEXT: movdqa %xmm0, %xmm3
3098; SSE41-NEXT: pxor %xmm1, %xmm3
3099; SSE41-NEXT: movdqa %xmm0, %xmm6
3100; SSE41-NEXT: pxor %xmm8, %xmm6
3101; SSE41-NEXT: movdqa %xmm6, %xmm7
3102; SSE41-NEXT: pcmpgtd %xmm3, %xmm7
3103; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
3104; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
3105; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
3106; SSE41-NEXT: pand %xmm4, %xmm6
3107; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
3108; SSE41-NEXT: por %xmm6, %xmm3
3109; SSE41-NEXT: pcmpeqd %xmm4, %xmm4
3110; SSE41-NEXT: pxor %xmm4, %xmm3
3111; SSE41-NEXT: movdqa %xmm0, %xmm6
3112; SSE41-NEXT: pxor %xmm2, %xmm6
3113; SSE41-NEXT: pxor %xmm9, %xmm0
3114; SSE41-NEXT: movdqa %xmm0, %xmm7
3115; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
3116; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
3117; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
3118; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
3119; SSE41-NEXT: pand %xmm5, %xmm6
3120; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
3121; SSE41-NEXT: por %xmm6, %xmm0
3122; SSE41-NEXT: pxor %xmm4, %xmm0
3123; SSE41-NEXT: blendvpd %xmm9, %xmm2
3124; SSE41-NEXT: movdqa %xmm3, %xmm0
3125; SSE41-NEXT: blendvpd %xmm8, %xmm1
3126; SSE41-NEXT: movapd %xmm2, %xmm0
3127; SSE41-NEXT: retq
3128;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003129; SSE42-LABEL: min_le_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003130; SSE42: # BB#0:
3131; SSE42-NEXT: movapd {{.*#+}} xmm1 = [7,1]
3132; SSE42-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551615,18446744073709551609]
3133; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775809,9223372036854775815]
3134; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm3
3135; SSE42-NEXT: pcmpeqd %xmm4, %xmm4
3136; SSE42-NEXT: pxor %xmm4, %xmm3
3137; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775801,9223372036854775807]
3138; SSE42-NEXT: pcmpgtq {{.*}}(%rip), %xmm0
3139; SSE42-NEXT: pxor %xmm4, %xmm0
3140; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm2
3141; SSE42-NEXT: movdqa %xmm3, %xmm0
3142; SSE42-NEXT: blendvpd {{.*}}(%rip), %xmm1
3143; SSE42-NEXT: movapd %xmm2, %xmm0
3144; SSE42-NEXT: retq
3145;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003146; AVX1-LABEL: min_le_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003147; AVX1: # BB#0:
3148; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
3149; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775809,9223372036854775815]
3150; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm1, %xmm1
3151; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
3152; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
3153; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775801,9223372036854775807]
3154; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm3, %xmm3
3155; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2
3156; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
3157; AVX1-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
3158; AVX1-NEXT: retq
3159;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003160; AVX2-LABEL: min_le_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003161; AVX2: # BB#0:
3162; AVX2-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
3163; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775801,9223372036854775807,9223372036854775809,9223372036854775815]
3164; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
3165; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
3166; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
3167; AVX2-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
3168; AVX2-NEXT: retq
3169;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003170; AVX512-LABEL: min_le_v4i64c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003171; AVX512: # BB#0:
3172; AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551609,7,1]
3173; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [9223372036854775801,9223372036854775807,9223372036854775809,9223372036854775815]
3174; AVX512-NEXT: vpcmpgtq {{.*}}(%rip), %ymm1, %ymm1
3175; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
3176; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
3177; AVX512-NEXT: vblendvpd %ymm1, {{.*}}(%rip), %ymm0, %ymm0
3178; AVX512-NEXT: retq
3179 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
3180 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
3181 %3 = icmp ule <4 x i64> %1, %2
3182 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
3183 ret <4 x i64> %4
3184}
3185
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003186define <4 x i32> @min_le_v4i32c() {
3187; SSE2-LABEL: min_le_v4i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003188; SSE2: # BB#0:
3189; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483641,2147483647,2147483649,2147483655]
3190; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
3191; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
3192; SSE2-NEXT: pxor %xmm0, %xmm1
3193; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
3194; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
3195; SSE2-NEXT: por %xmm1, %xmm0
3196; SSE2-NEXT: retq
3197;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003198; SSE41-LABEL: min_le_v4i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003199; SSE41: # BB#0:
3200; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
3201; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
3202; SSE41-NEXT: retq
3203;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003204; SSE42-LABEL: min_le_v4i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003205; SSE42: # BB#0:
3206; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
3207; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
3208; SSE42-NEXT: retq
3209;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003210; AVX-LABEL: min_le_v4i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003211; AVX: # BB#0:
3212; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7]
3213; AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
3214; AVX-NEXT: retq
3215 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
3216 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 1, i32 0
3217 %3 = icmp ule <4 x i32> %1, %2
3218 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
3219 ret <4 x i32> %4
3220}
3221
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003222define <8 x i32> @min_le_v8i32c() {
3223; SSE2-LABEL: min_le_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003224; SSE2: # BB#0:
3225; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483649,2147483651,2147483653,2147483655]
3226; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
3227; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
3228; SSE2-NEXT: movdqa %xmm1, %xmm3
3229; SSE2-NEXT: pxor %xmm2, %xmm3
3230; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483641,2147483643,2147483645,2147483647]
3231; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
3232; SSE2-NEXT: pxor %xmm0, %xmm2
3233; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
3234; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
3235; SSE2-NEXT: por %xmm2, %xmm0
3236; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3
3237; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
3238; SSE2-NEXT: por %xmm3, %xmm1
3239; SSE2-NEXT: retq
3240;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003241; SSE41-LABEL: min_le_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003242; SSE41: # BB#0:
3243; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
3244; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
3245; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
3246; SSE41-NEXT: pminud {{.*}}(%rip), %xmm1
3247; SSE41-NEXT: retq
3248;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003249; SSE42-LABEL: min_le_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003250; SSE42: # BB#0:
3251; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7]
3252; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
3253; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0
3254; SSE42-NEXT: pminud {{.*}}(%rip), %xmm1
3255; SSE42-NEXT: retq
3256;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003257; AVX1-LABEL: min_le_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003258; AVX1: # BB#0:
3259; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295]
3260; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0
3261; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
3262; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm1, %xmm1
3263; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3264; AVX1-NEXT: retq
3265;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003266; AVX2-LABEL: min_le_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003267; AVX2: # BB#0:
3268; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
3269; AVX2-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
3270; AVX2-NEXT: retq
3271;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003272; AVX512-LABEL: min_le_v8i32c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003273; AVX512: # BB#0:
3274; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7]
3275; AVX512-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0
3276; AVX512-NEXT: retq
3277 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
3278 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 1, i32 0
3279 %3 = icmp ule <8 x i32> %1, %2
3280 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
3281 ret <8 x i32> %4
3282}
3283
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003284define <8 x i16> @min_le_v8i16c() {
3285; SSE2-LABEL: min_le_v8i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003286; SSE2: # BB#0:
3287; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65529,65531,65533,65535,1,3,5,7]
3288; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,65533,65531,65529,7,5,3,1]
3289; SSE2-NEXT: movdqa %xmm1, %xmm3
3290; SSE2-NEXT: psubusw %xmm2, %xmm3
3291; SSE2-NEXT: pxor %xmm0, %xmm0
3292; SSE2-NEXT: pcmpeqw %xmm3, %xmm0
3293; SSE2-NEXT: pand %xmm0, %xmm1
3294; SSE2-NEXT: pandn %xmm2, %xmm0
3295; SSE2-NEXT: por %xmm1, %xmm0
3296; SSE2-NEXT: retq
3297;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003298; SSE41-LABEL: min_le_v8i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003299; SSE41: # BB#0:
3300; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
3301; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
3302; SSE41-NEXT: retq
3303;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003304; SSE42-LABEL: min_le_v8i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003305; SSE42: # BB#0:
3306; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
3307; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
3308; SSE42-NEXT: retq
3309;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003310; AVX-LABEL: min_le_v8i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003311; AVX: # BB#0:
3312; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7]
3313; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
3314; AVX-NEXT: retq
3315 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i16 0
3316 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 1, i16 0
3317 %3 = icmp ule <8 x i16> %1, %2
3318 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
3319 ret <8 x i16> %4
3320}
3321
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003322define <16 x i16> @min_le_v16i16c() {
3323; SSE2-LABEL: min_le_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003324; SSE2: # BB#0:
3325; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65529,65530,65531,65532,65533,65534,65535,0]
3326; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,3,4,5,6,7,8]
3327; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,65534,65533,65532,65531,65530,65529,0]
3328; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0]
3329; SSE2-NEXT: movdqa %xmm3, %xmm1
3330; SSE2-NEXT: psubusw %xmm5, %xmm1
3331; SSE2-NEXT: pxor %xmm6, %xmm6
3332; SSE2-NEXT: pcmpeqw %xmm6, %xmm1
3333; SSE2-NEXT: movdqa %xmm2, %xmm0
3334; SSE2-NEXT: psubusw %xmm4, %xmm0
3335; SSE2-NEXT: pcmpeqw %xmm6, %xmm0
3336; SSE2-NEXT: pand %xmm0, %xmm2
3337; SSE2-NEXT: pandn %xmm4, %xmm0
3338; SSE2-NEXT: por %xmm2, %xmm0
3339; SSE2-NEXT: pand %xmm1, %xmm3
3340; SSE2-NEXT: pandn %xmm5, %xmm1
3341; SSE2-NEXT: por %xmm3, %xmm1
3342; SSE2-NEXT: retq
3343;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003344; SSE41-LABEL: min_le_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003345; SSE41: # BB#0:
3346; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
3347; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
3348; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
3349; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm1
3350; SSE41-NEXT: retq
3351;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003352; SSE42-LABEL: min_le_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003353; SSE42: # BB#0:
3354; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
3355; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
3356; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0
3357; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm1
3358; SSE42-NEXT: retq
3359;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003360; AVX1-LABEL: min_le_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003361; AVX1: # BB#0:
3362; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0]
3363; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
3364; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
3365; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm1, %xmm1
3366; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3367; AVX1-NEXT: retq
3368;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003369; AVX2-LABEL: min_le_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003370; AVX2: # BB#0:
3371; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
3372; AVX2-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
3373; AVX2-NEXT: retq
3374;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003375; AVX512-LABEL: min_le_v16i16c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003376; AVX512: # BB#0:
3377; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8]
3378; AVX512-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
3379; AVX512-NEXT: retq
3380 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i16 0
3381 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 1, i16 0
3382 %3 = icmp ule <16 x i16> %1, %2
3383 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
3384 ret <16 x i16> %4
3385}
3386
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003387define <16 x i8> @min_le_v16i8c() {
3388; SSE-LABEL: min_le_v16i8c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003389; SSE: # BB#0:
3390; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
3391; SSE-NEXT: pminub {{.*}}(%rip), %xmm0
3392; SSE-NEXT: retq
3393;
Simon Pilgrim9f4374d2015-08-18 09:02:51 +00003394; AVX-LABEL: min_le_v16i8c:
Simon Pilgrim19ffd572015-08-18 08:52:43 +00003395; AVX: # BB#0:
3396; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8]
3397; AVX-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm0
3398; AVX-NEXT: retq
3399 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i8 0
3400 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 1, i8 0
3401 %3 = icmp ule <16 x i8> %1, %2
3402 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
3403 ret <16 x i8> %4
3404}