blob: 8265a5513ce8ed0e59ae9b4ca106edc2341ea77f [file] [log] [blame]
Sanjay Patel7feb3ed2018-09-10 17:40:15 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ANY,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ANY,SSE,SSE41
4
5; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.
6; Test each of those patterns with i8/i16/i32/i64.
7; Test each of those with a constant operand and a variable operand.
8; Test each of those with a 128-bit vector type.
9
10define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
11; ANY-LABEL: unsigned_sat_constant_i8_using_min:
12; ANY: # %bb.0:
13; ANY-NEXT: cmpb $-43, %dil
14; ANY-NEXT: jb .LBB0_2
15; ANY-NEXT: # %bb.1:
16; ANY-NEXT: movb $-43, %dil
17; ANY-NEXT: .LBB0_2:
18; ANY-NEXT: addb $42, %dil
19; ANY-NEXT: movl %edi, %eax
20; ANY-NEXT: retq
21 %c = icmp ult i8 %x, -43
22 %s = select i1 %c, i8 %x, i8 -43
23 %r = add i8 %s, 42
24 ret i8 %r
25}
26
27define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
28; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
29; ANY: # %bb.0:
30; ANY-NEXT: addb $42, %dil
31; ANY-NEXT: movb $-1, %al
32; ANY-NEXT: jb .LBB1_2
33; ANY-NEXT: # %bb.1:
34; ANY-NEXT: movl %edi, %eax
35; ANY-NEXT: .LBB1_2:
36; ANY-NEXT: retq
37 %a = add i8 %x, 42
38 %c = icmp ugt i8 %x, %a
39 %r = select i1 %c, i8 -1, i8 %a
40 ret i8 %r
41}
42
43define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
44; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
45; ANY: # %bb.0:
46; ANY-NEXT: cmpb $-43, %dil
47; ANY-NEXT: movb $-1, %al
48; ANY-NEXT: ja .LBB2_2
49; ANY-NEXT: # %bb.1:
50; ANY-NEXT: addb $42, %dil
51; ANY-NEXT: movl %edi, %eax
52; ANY-NEXT: .LBB2_2:
53; ANY-NEXT: retq
54 %a = add i8 %x, 42
55 %c = icmp ugt i8 %x, -43
56 %r = select i1 %c, i8 -1, i8 %a
57 ret i8 %r
58}
59
60define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
61; ANY-LABEL: unsigned_sat_constant_i16_using_min:
62; ANY: # %bb.0:
63; ANY-NEXT: movzwl %di, %eax
64; ANY-NEXT: cmpl $65493, %eax # imm = 0xFFD5
65; ANY-NEXT: movl $65493, %eax # imm = 0xFFD5
66; ANY-NEXT: cmovbl %edi, %eax
67; ANY-NEXT: addl $42, %eax
68; ANY-NEXT: # kill: def $ax killed $ax killed $eax
69; ANY-NEXT: retq
70 %c = icmp ult i16 %x, -43
71 %s = select i1 %c, i16 %x, i16 -43
72 %r = add i16 %s, 42
73 ret i16 %r
74}
75
76define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
77; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
78; ANY: # %bb.0:
79; ANY-NEXT: addw $42, %di
80; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
81; ANY-NEXT: cmovael %edi, %eax
82; ANY-NEXT: # kill: def $ax killed $ax killed $eax
83; ANY-NEXT: retq
84 %a = add i16 %x, 42
85 %c = icmp ugt i16 %x, %a
86 %r = select i1 %c, i16 -1, i16 %a
87 ret i16 %r
88}
89
90define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
91; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
92; ANY: # %bb.0:
93; ANY-NEXT: # kill: def $edi killed $edi def $rdi
94; ANY-NEXT: leal 42(%rdi), %ecx
95; ANY-NEXT: movzwl %di, %eax
96; ANY-NEXT: cmpl $65493, %eax # imm = 0xFFD5
97; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
98; ANY-NEXT: cmovbel %ecx, %eax
99; ANY-NEXT: # kill: def $ax killed $ax killed $eax
100; ANY-NEXT: retq
101 %a = add i16 %x, 42
102 %c = icmp ugt i16 %x, -43
103 %r = select i1 %c, i16 -1, i16 %a
104 ret i16 %r
105}
106
107define i32 @unsigned_sat_constant_i32_using_min(i32 %x) {
108; ANY-LABEL: unsigned_sat_constant_i32_using_min:
109; ANY: # %bb.0:
110; ANY-NEXT: cmpl $-43, %edi
111; ANY-NEXT: movl $-43, %eax
112; ANY-NEXT: cmovbl %edi, %eax
113; ANY-NEXT: addl $42, %eax
114; ANY-NEXT: retq
115 %c = icmp ult i32 %x, -43
116 %s = select i1 %c, i32 %x, i32 -43
117 %r = add i32 %s, 42
118 ret i32 %r
119}
120
121define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
122; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
123; ANY: # %bb.0:
124; ANY-NEXT: addl $42, %edi
125; ANY-NEXT: movl $-1, %eax
126; ANY-NEXT: cmovael %edi, %eax
127; ANY-NEXT: retq
128 %a = add i32 %x, 42
129 %c = icmp ugt i32 %x, %a
130 %r = select i1 %c, i32 -1, i32 %a
131 ret i32 %r
132}
133
134define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
135; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
136; ANY: # %bb.0:
137; ANY-NEXT: # kill: def $edi killed $edi def $rdi
138; ANY-NEXT: leal 42(%rdi), %ecx
139; ANY-NEXT: cmpl $-43, %edi
140; ANY-NEXT: movl $-1, %eax
141; ANY-NEXT: cmovbel %ecx, %eax
142; ANY-NEXT: retq
143 %a = add i32 %x, 42
144 %c = icmp ugt i32 %x, -43
145 %r = select i1 %c, i32 -1, i32 %a
146 ret i32 %r
147}
148
149define i64 @unsigned_sat_constant_i64_using_min(i64 %x) {
150; ANY-LABEL: unsigned_sat_constant_i64_using_min:
151; ANY: # %bb.0:
152; ANY-NEXT: cmpq $-43, %rdi
153; ANY-NEXT: movq $-43, %rax
154; ANY-NEXT: cmovbq %rdi, %rax
155; ANY-NEXT: addq $42, %rax
156; ANY-NEXT: retq
157 %c = icmp ult i64 %x, -43
158 %s = select i1 %c, i64 %x, i64 -43
159 %r = add i64 %s, 42
160 ret i64 %r
161}
162
163define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
164; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_sum:
165; ANY: # %bb.0:
166; ANY-NEXT: addq $42, %rdi
167; ANY-NEXT: movq $-1, %rax
168; ANY-NEXT: cmovaeq %rdi, %rax
169; ANY-NEXT: retq
170 %a = add i64 %x, 42
171 %c = icmp ugt i64 %x, %a
172 %r = select i1 %c, i64 -1, i64 %a
173 ret i64 %r
174}
175
176define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
177; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
178; ANY: # %bb.0:
179; ANY-NEXT: cmpq $-43, %rdi
180; ANY-NEXT: leaq 42(%rdi), %rax
181; ANY-NEXT: movq $-1, %rcx
182; ANY-NEXT: cmovaq %rcx, %rax
183; ANY-NEXT: retq
184 %a = add i64 %x, 42
185 %c = icmp ugt i64 %x, -43
186 %r = select i1 %c, i64 -1, i64 %a
187 ret i64 %r
188}
189
190define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
191; ANY-LABEL: unsigned_sat_variable_i8_using_min:
192; ANY: # %bb.0:
193; ANY-NEXT: movl %esi, %eax
194; ANY-NEXT: notb %al
195; ANY-NEXT: cmpb %al, %dil
196; ANY-NEXT: jb .LBB12_2
197; ANY-NEXT: # %bb.1:
198; ANY-NEXT: movl %eax, %edi
199; ANY-NEXT: .LBB12_2:
200; ANY-NEXT: addb %sil, %dil
201; ANY-NEXT: movl %edi, %eax
202; ANY-NEXT: retq
203 %noty = xor i8 %y, -1
204 %c = icmp ult i8 %x, %noty
205 %s = select i1 %c, i8 %x, i8 %noty
206 %r = add i8 %s, %y
207 ret i8 %r
208}
209
210define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
211; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
212; ANY: # %bb.0:
213; ANY-NEXT: addb %sil, %dil
214; ANY-NEXT: movb $-1, %al
215; ANY-NEXT: jb .LBB13_2
216; ANY-NEXT: # %bb.1:
217; ANY-NEXT: movl %edi, %eax
218; ANY-NEXT: .LBB13_2:
219; ANY-NEXT: retq
220 %a = add i8 %x, %y
221 %c = icmp ugt i8 %x, %a
222 %r = select i1 %c, i8 -1, i8 %a
223 ret i8 %r
224}
225
226define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
227; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
228; ANY: # %bb.0:
229; ANY-NEXT: movl %esi, %eax
230; ANY-NEXT: notb %al
231; ANY-NEXT: cmpb %al, %dil
232; ANY-NEXT: movb $-1, %al
233; ANY-NEXT: ja .LBB14_2
234; ANY-NEXT: # %bb.1:
235; ANY-NEXT: addb %sil, %dil
236; ANY-NEXT: movl %edi, %eax
237; ANY-NEXT: .LBB14_2:
238; ANY-NEXT: retq
239 %noty = xor i8 %y, -1
240 %a = add i8 %x, %y
241 %c = icmp ugt i8 %x, %noty
242 %r = select i1 %c, i8 -1, i8 %a
243 ret i8 %r
244}
245
246define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
247; ANY-LABEL: unsigned_sat_variable_i16_using_min:
248; ANY: # %bb.0:
249; ANY-NEXT: # kill: def $esi killed $esi def $rsi
250; ANY-NEXT: movl %esi, %eax
251; ANY-NEXT: notl %eax
252; ANY-NEXT: cmpw %ax, %di
253; ANY-NEXT: cmovbl %edi, %eax
254; ANY-NEXT: leal (%rax,%rsi), %eax
255; ANY-NEXT: # kill: def $ax killed $ax killed $eax
256; ANY-NEXT: retq
257 %noty = xor i16 %y, -1
258 %c = icmp ult i16 %x, %noty
259 %s = select i1 %c, i16 %x, i16 %noty
260 %r = add i16 %s, %y
261 ret i16 %r
262}
263
264define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
265; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
266; ANY: # %bb.0:
267; ANY-NEXT: addw %si, %di
268; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
269; ANY-NEXT: cmovael %edi, %eax
270; ANY-NEXT: # kill: def $ax killed $ax killed $eax
271; ANY-NEXT: retq
272 %a = add i16 %x, %y
273 %c = icmp ugt i16 %x, %a
274 %r = select i1 %c, i16 -1, i16 %a
275 ret i16 %r
276}
277
278define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
279; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
280; ANY: # %bb.0:
281; ANY-NEXT: # kill: def $esi killed $esi def $rsi
282; ANY-NEXT: # kill: def $edi killed $edi def $rdi
283; ANY-NEXT: leal (%rdi,%rsi), %ecx
284; ANY-NEXT: notl %esi
285; ANY-NEXT: cmpw %si, %di
286; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
287; ANY-NEXT: cmovbel %ecx, %eax
288; ANY-NEXT: # kill: def $ax killed $ax killed $eax
289; ANY-NEXT: retq
290 %noty = xor i16 %y, -1
291 %a = add i16 %x, %y
292 %c = icmp ugt i16 %x, %noty
293 %r = select i1 %c, i16 -1, i16 %a
294 ret i16 %r
295}
296
297define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) {
298; ANY-LABEL: unsigned_sat_variable_i32_using_min:
299; ANY: # %bb.0:
300; ANY-NEXT: # kill: def $esi killed $esi def $rsi
301; ANY-NEXT: movl %esi, %eax
302; ANY-NEXT: notl %eax
303; ANY-NEXT: cmpl %eax, %edi
304; ANY-NEXT: cmovbl %edi, %eax
305; ANY-NEXT: leal (%rax,%rsi), %eax
306; ANY-NEXT: retq
307 %noty = xor i32 %y, -1
308 %c = icmp ult i32 %x, %noty
309 %s = select i1 %c, i32 %x, i32 %noty
310 %r = add i32 %s, %y
311 ret i32 %r
312}
313
314define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
315; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
316; ANY: # %bb.0:
317; ANY-NEXT: addl %esi, %edi
318; ANY-NEXT: movl $-1, %eax
319; ANY-NEXT: cmovael %edi, %eax
320; ANY-NEXT: retq
321 %a = add i32 %x, %y
322 %c = icmp ugt i32 %x, %a
323 %r = select i1 %c, i32 -1, i32 %a
324 ret i32 %r
325}
326
327define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
328; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
329; ANY: # %bb.0:
330; ANY-NEXT: # kill: def $esi killed $esi def $rsi
331; ANY-NEXT: # kill: def $edi killed $edi def $rdi
332; ANY-NEXT: leal (%rdi,%rsi), %ecx
333; ANY-NEXT: notl %esi
334; ANY-NEXT: cmpl %esi, %edi
335; ANY-NEXT: movl $-1, %eax
336; ANY-NEXT: cmovbel %ecx, %eax
337; ANY-NEXT: retq
338 %noty = xor i32 %y, -1
339 %a = add i32 %x, %y
340 %c = icmp ugt i32 %x, %noty
341 %r = select i1 %c, i32 -1, i32 %a
342 ret i32 %r
343}
344
345define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) {
346; ANY-LABEL: unsigned_sat_variable_i64_using_min:
347; ANY: # %bb.0:
348; ANY-NEXT: movq %rsi, %rax
349; ANY-NEXT: notq %rax
350; ANY-NEXT: cmpq %rax, %rdi
351; ANY-NEXT: cmovbq %rdi, %rax
352; ANY-NEXT: leaq (%rax,%rsi), %rax
353; ANY-NEXT: retq
354 %noty = xor i64 %y, -1
355 %c = icmp ult i64 %x, %noty
356 %s = select i1 %c, i64 %x, i64 %noty
357 %r = add i64 %s, %y
358 ret i64 %r
359}
360
361define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
362; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_sum:
363; ANY: # %bb.0:
364; ANY-NEXT: addq %rsi, %rdi
365; ANY-NEXT: movq $-1, %rax
366; ANY-NEXT: cmovaeq %rdi, %rax
367; ANY-NEXT: retq
368 %a = add i64 %x, %y
369 %c = icmp ugt i64 %x, %a
370 %r = select i1 %c, i64 -1, i64 %a
371 ret i64 %r
372}
373
374define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
375; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
376; ANY: # %bb.0:
377; ANY-NEXT: leaq (%rdi,%rsi), %rcx
378; ANY-NEXT: notq %rsi
379; ANY-NEXT: cmpq %rsi, %rdi
380; ANY-NEXT: movq $-1, %rax
381; ANY-NEXT: cmovbeq %rcx, %rax
382; ANY-NEXT: retq
383 %noty = xor i64 %y, -1
384 %a = add i64 %x, %y
385 %c = icmp ugt i64 %x, %noty
386 %r = select i1 %c, i64 -1, i64 %a
387 ret i64 %r
388}
389
390define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
391; ANY-LABEL: unsigned_sat_constant_v16i8_using_min:
392; ANY: # %bb.0:
393; ANY-NEXT: pminub {{.*}}(%rip), %xmm0
394; ANY-NEXT: paddb {{.*}}(%rip), %xmm0
395; ANY-NEXT: retq
396 %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
397 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
398 %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
399 ret <16 x i8> %r
400}
401
402define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
403; ANY-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
404; ANY: # %bb.0:
405; ANY-NEXT: paddusb {{.*}}(%rip), %xmm0
406; ANY-NEXT: retq
407 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
408 %c = icmp ugt <16 x i8> %x, %a
409 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
410 ret <16 x i8> %r
411}
412
413define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
414; ANY-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
415; ANY: # %bb.0:
416; ANY-NEXT: paddusb {{.*}}(%rip), %xmm0
417; ANY-NEXT: retq
418 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
419 %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
420 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
421 ret <16 x i8> %r
422}
423
424define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
425; SSE2-LABEL: unsigned_sat_constant_v8i16_using_min:
426; SSE2: # %bb.0:
427; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
428; SSE2-NEXT: pxor %xmm1, %xmm0
429; SSE2-NEXT: pminsw {{.*}}(%rip), %xmm0
430; SSE2-NEXT: pxor %xmm1, %xmm0
431; SSE2-NEXT: paddw {{.*}}(%rip), %xmm0
432; SSE2-NEXT: retq
433;
434; SSE41-LABEL: unsigned_sat_constant_v8i16_using_min:
435; SSE41: # %bb.0:
436; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
437; SSE41-NEXT: paddw {{.*}}(%rip), %xmm0
438; SSE41-NEXT: retq
439 %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
440 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
441 %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
442 ret <8 x i16> %r
443}
444
445define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
446; ANY-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
447; ANY: # %bb.0:
448; ANY-NEXT: paddusw {{.*}}(%rip), %xmm0
449; ANY-NEXT: retq
450 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
451 %c = icmp ugt <8 x i16> %x, %a
452 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
453 ret <8 x i16> %r
454}
455
456define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
457; ANY-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
458; ANY: # %bb.0:
459; ANY-NEXT: paddusw {{.*}}(%rip), %xmm0
460; ANY-NEXT: retq
461 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
462 %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
463 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
464 ret <8 x i16> %r
465}
466
467define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
468; SSE2-LABEL: unsigned_sat_constant_v4i32_using_min:
469; SSE2: # %bb.0:
470; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
471; SSE2-NEXT: pxor %xmm0, %xmm1
472; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483605,2147483605,2147483605,2147483605]
473; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
474; SSE2-NEXT: pand %xmm2, %xmm0
475; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
476; SSE2-NEXT: por %xmm2, %xmm0
477; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
478; SSE2-NEXT: retq
479;
480; SSE41-LABEL: unsigned_sat_constant_v4i32_using_min:
481; SSE41: # %bb.0:
482; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
483; SSE41-NEXT: paddd {{.*}}(%rip), %xmm0
484; SSE41-NEXT: retq
485 %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
486 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43>
487 %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42>
488 ret <4 x i32> %r
489}
490
491define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
492; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
493; SSE2: # %bb.0:
494; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
495; SSE2-NEXT: paddd %xmm0, %xmm1
496; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
497; SSE2-NEXT: pxor %xmm2, %xmm0
498; SSE2-NEXT: pxor %xmm1, %xmm2
499; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
500; SSE2-NEXT: por %xmm1, %xmm0
501; SSE2-NEXT: retq
502;
503; SSE41-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
504; SSE41: # %bb.0:
505; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [42,42,42,42]
506; SSE41-NEXT: paddd %xmm0, %xmm2
507; SSE41-NEXT: movdqa %xmm0, %xmm1
508; SSE41-NEXT: pminud %xmm2, %xmm1
509; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
510; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
511; SSE41-NEXT: pxor %xmm0, %xmm1
512; SSE41-NEXT: por %xmm2, %xmm1
513; SSE41-NEXT: movdqa %xmm1, %xmm0
514; SSE41-NEXT: retq
515 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
516 %c = icmp ugt <4 x i32> %x, %a
517 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
518 ret <4 x i32> %r
519}
520
521define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
522; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
523; SSE2: # %bb.0:
524; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
525; SSE2-NEXT: paddd %xmm0, %xmm1
526; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
527; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
528; SSE2-NEXT: por %xmm1, %xmm0
529; SSE2-NEXT: retq
530;
531; SSE41-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
532; SSE41: # %bb.0:
533; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
534; SSE41-NEXT: paddd %xmm0, %xmm1
535; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
536; SSE41-NEXT: pminud %xmm0, %xmm2
537; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
538; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
539; SSE41-NEXT: pxor %xmm2, %xmm0
540; SSE41-NEXT: por %xmm1, %xmm0
541; SSE41-NEXT: retq
542 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
543 %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
544 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
545 ret <4 x i32> %r
546}
547
548define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
549; SSE2-LABEL: unsigned_sat_constant_v2i64_using_min:
550; SSE2: # %bb.0:
551; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
552; SSE2-NEXT: pxor %xmm0, %xmm1
553; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
554; SSE2-NEXT: movdqa %xmm2, %xmm3
555; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
556; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
557; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
558; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
559; SSE2-NEXT: pand %xmm4, %xmm1
560; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
561; SSE2-NEXT: por %xmm1, %xmm2
562; SSE2-NEXT: pand %xmm2, %xmm0
563; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
564; SSE2-NEXT: por %xmm2, %xmm0
565; SSE2-NEXT: paddq {{.*}}(%rip), %xmm0
566; SSE2-NEXT: retq
567;
568; SSE41-LABEL: unsigned_sat_constant_v2i64_using_min:
569; SSE41: # %bb.0:
570; SSE41-NEXT: movdqa %xmm0, %xmm1
571; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573]
572; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
573; SSE41-NEXT: pxor %xmm1, %xmm0
574; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117]
575; SSE41-NEXT: movdqa %xmm3, %xmm4
576; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
577; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
578; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
579; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
580; SSE41-NEXT: pand %xmm5, %xmm3
581; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
582; SSE41-NEXT: por %xmm3, %xmm0
583; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
584; SSE41-NEXT: paddq {{.*}}(%rip), %xmm2
585; SSE41-NEXT: movdqa %xmm2, %xmm0
586; SSE41-NEXT: retq
587 %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43>
588 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43>
589 %r = add <2 x i64> %s, <i64 42, i64 42>
590 ret <2 x i64> %r
591}
592
593define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
594; ANY-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
595; ANY: # %bb.0:
596; ANY-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
597; ANY-NEXT: paddq %xmm0, %xmm1
598; ANY-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
599; ANY-NEXT: pxor %xmm2, %xmm0
600; ANY-NEXT: pxor %xmm1, %xmm2
601; ANY-NEXT: movdqa %xmm0, %xmm3
602; ANY-NEXT: pcmpgtd %xmm2, %xmm3
603; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
604; ANY-NEXT: pcmpeqd %xmm0, %xmm2
605; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
606; ANY-NEXT: pand %xmm4, %xmm2
607; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
608; ANY-NEXT: por %xmm1, %xmm0
609; ANY-NEXT: por %xmm2, %xmm0
610; ANY-NEXT: retq
611 %a = add <2 x i64> %x, <i64 42, i64 42>
612 %c = icmp ugt <2 x i64> %x, %a
613 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
614 ret <2 x i64> %r
615}
616
617define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
618; ANY-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
619; ANY: # %bb.0:
620; ANY-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
621; ANY-NEXT: paddq %xmm0, %xmm1
622; ANY-NEXT: pxor {{.*}}(%rip), %xmm0
623; ANY-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
624; ANY-NEXT: movdqa %xmm0, %xmm3
625; ANY-NEXT: pcmpgtd %xmm2, %xmm3
626; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
627; ANY-NEXT: pcmpeqd %xmm2, %xmm0
628; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
629; ANY-NEXT: pand %xmm4, %xmm2
630; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
631; ANY-NEXT: por %xmm1, %xmm0
632; ANY-NEXT: por %xmm2, %xmm0
633; ANY-NEXT: retq
634 %a = add <2 x i64> %x, <i64 42, i64 42>
635 %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
636 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
637 ret <2 x i64> %r
638}
639
640define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) {
641; ANY-LABEL: unsigned_sat_variable_v16i8_using_min:
642; ANY: # %bb.0:
643; ANY-NEXT: pcmpeqd %xmm2, %xmm2
644; ANY-NEXT: pxor %xmm1, %xmm2
645; ANY-NEXT: pminub %xmm2, %xmm0
646; ANY-NEXT: paddb %xmm1, %xmm0
647; ANY-NEXT: retq
648 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
649 %c = icmp ult <16 x i8> %x, %noty
650 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty
651 %r = add <16 x i8> %s, %y
652 ret <16 x i8> %r
653}
654
655define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
656; ANY-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
657; ANY: # %bb.0:
658; ANY-NEXT: paddusb %xmm1, %xmm0
659; ANY-NEXT: retq
660 %a = add <16 x i8> %x, %y
661 %c = icmp ugt <16 x i8> %x, %a
662 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
663 ret <16 x i8> %r
664}
665
666define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) {
667; ANY-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
668; ANY: # %bb.0:
669; ANY-NEXT: pcmpeqd %xmm2, %xmm2
670; ANY-NEXT: movdqa %xmm0, %xmm3
671; ANY-NEXT: paddb %xmm1, %xmm3
672; ANY-NEXT: pxor %xmm2, %xmm1
673; ANY-NEXT: pminub %xmm0, %xmm1
674; ANY-NEXT: pcmpeqb %xmm1, %xmm0
675; ANY-NEXT: pxor %xmm2, %xmm0
676; ANY-NEXT: por %xmm3, %xmm0
677; ANY-NEXT: retq
678 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
679 %a = add <16 x i8> %x, %y
680 %c = icmp ugt <16 x i8> %x, %noty
681 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
682 ret <16 x i8> %r
683}
684
685define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) {
686; SSE2-LABEL: unsigned_sat_variable_v8i16_using_min:
687; SSE2: # %bb.0:
688; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
689; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32768,32768,32768,32768,32768,32768,32768,32768]
690; SSE2-NEXT: pxor %xmm3, %xmm0
691; SSE2-NEXT: pxor %xmm3, %xmm2
692; SSE2-NEXT: pxor %xmm1, %xmm2
693; SSE2-NEXT: pminsw %xmm2, %xmm0
694; SSE2-NEXT: pxor %xmm3, %xmm0
695; SSE2-NEXT: paddw %xmm1, %xmm0
696; SSE2-NEXT: retq
697;
698; SSE41-LABEL: unsigned_sat_variable_v8i16_using_min:
699; SSE41: # %bb.0:
700; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
701; SSE41-NEXT: pxor %xmm1, %xmm2
702; SSE41-NEXT: pminuw %xmm2, %xmm0
703; SSE41-NEXT: paddw %xmm1, %xmm0
704; SSE41-NEXT: retq
705 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
706 %c = icmp ult <8 x i16> %x, %noty
707 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty
708 %r = add <8 x i16> %s, %y
709 ret <8 x i16> %r
710}
711
712define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
713; ANY-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
714; ANY: # %bb.0:
715; ANY-NEXT: paddusw %xmm1, %xmm0
716; ANY-NEXT: retq
717 %a = add <8 x i16> %x, %y
718 %c = icmp ugt <8 x i16> %x, %a
719 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
720 ret <8 x i16> %r
721}
722
723define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) {
724; SSE2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
725; SSE2: # %bb.0:
726; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
727; SSE2-NEXT: movdqa %xmm0, %xmm3
728; SSE2-NEXT: paddw %xmm1, %xmm3
729; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
730; SSE2-NEXT: pxor %xmm4, %xmm0
731; SSE2-NEXT: pxor %xmm4, %xmm2
732; SSE2-NEXT: pxor %xmm1, %xmm2
733; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
734; SSE2-NEXT: por %xmm3, %xmm0
735; SSE2-NEXT: retq
736;
737; SSE41-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
738; SSE41: # %bb.0:
739; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
740; SSE41-NEXT: movdqa %xmm0, %xmm3
741; SSE41-NEXT: paddw %xmm1, %xmm3
742; SSE41-NEXT: pxor %xmm2, %xmm1
743; SSE41-NEXT: pminuw %xmm0, %xmm1
744; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
745; SSE41-NEXT: pxor %xmm2, %xmm0
746; SSE41-NEXT: por %xmm3, %xmm0
747; SSE41-NEXT: retq
748 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
749 %a = add <8 x i16> %x, %y
750 %c = icmp ugt <8 x i16> %x, %noty
751 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
752 ret <8 x i16> %r
753}
754
755define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) {
756; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min:
757; SSE2: # %bb.0:
758; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
759; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
760; SSE2-NEXT: movdqa %xmm0, %xmm4
761; SSE2-NEXT: pxor %xmm3, %xmm4
762; SSE2-NEXT: pxor %xmm2, %xmm3
763; SSE2-NEXT: pxor %xmm1, %xmm3
764; SSE2-NEXT: pcmpgtd %xmm4, %xmm3
765; SSE2-NEXT: pand %xmm3, %xmm0
766; SSE2-NEXT: pxor %xmm2, %xmm3
767; SSE2-NEXT: movdqa %xmm1, %xmm2
768; SSE2-NEXT: pandn %xmm3, %xmm2
769; SSE2-NEXT: por %xmm2, %xmm0
770; SSE2-NEXT: paddd %xmm1, %xmm0
771; SSE2-NEXT: retq
772;
773; SSE41-LABEL: unsigned_sat_variable_v4i32_using_min:
774; SSE41: # %bb.0:
775; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
776; SSE41-NEXT: pxor %xmm1, %xmm2
777; SSE41-NEXT: pminud %xmm2, %xmm0
778; SSE41-NEXT: paddd %xmm1, %xmm0
779; SSE41-NEXT: retq
780 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
781 %c = icmp ult <4 x i32> %x, %noty
782 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty
783 %r = add <4 x i32> %s, %y
784 ret <4 x i32> %r
785}
786
787define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
788; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
789; SSE2: # %bb.0:
790; SSE2-NEXT: paddd %xmm0, %xmm1
791; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
792; SSE2-NEXT: pxor %xmm2, %xmm0
793; SSE2-NEXT: pxor %xmm1, %xmm2
794; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
795; SSE2-NEXT: por %xmm1, %xmm0
796; SSE2-NEXT: retq
797;
798; SSE41-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
799; SSE41: # %bb.0:
800; SSE41-NEXT: paddd %xmm0, %xmm1
801; SSE41-NEXT: movdqa %xmm0, %xmm2
802; SSE41-NEXT: pminud %xmm1, %xmm2
803; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
804; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
805; SSE41-NEXT: pxor %xmm0, %xmm2
806; SSE41-NEXT: por %xmm1, %xmm2
807; SSE41-NEXT: movdqa %xmm2, %xmm0
808; SSE41-NEXT: retq
809 %a = add <4 x i32> %x, %y
810 %c = icmp ugt <4 x i32> %x, %a
811 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
812 ret <4 x i32> %r
813}
814
815define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) {
816; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
817; SSE2: # %bb.0:
818; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
819; SSE2-NEXT: movdqa %xmm0, %xmm3
820; SSE2-NEXT: paddd %xmm1, %xmm3
821; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
822; SSE2-NEXT: pxor %xmm4, %xmm0
823; SSE2-NEXT: pxor %xmm4, %xmm2
824; SSE2-NEXT: pxor %xmm1, %xmm2
825; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
826; SSE2-NEXT: por %xmm3, %xmm0
827; SSE2-NEXT: retq
828;
829; SSE41-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
830; SSE41: # %bb.0:
831; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
832; SSE41-NEXT: movdqa %xmm0, %xmm3
833; SSE41-NEXT: paddd %xmm1, %xmm3
834; SSE41-NEXT: pxor %xmm2, %xmm1
835; SSE41-NEXT: pminud %xmm0, %xmm1
836; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
837; SSE41-NEXT: pxor %xmm2, %xmm0
838; SSE41-NEXT: por %xmm3, %xmm0
839; SSE41-NEXT: retq
840 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
841 %a = add <4 x i32> %x, %y
842 %c = icmp ugt <4 x i32> %x, %noty
843 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
844 ret <4 x i32> %r
845}
846
847define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) {
848; SSE2-LABEL: unsigned_sat_variable_v2i64_using_min:
849; SSE2: # %bb.0:
850; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
851; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
852; SSE2-NEXT: movdqa %xmm0, %xmm4
853; SSE2-NEXT: pxor %xmm2, %xmm4
854; SSE2-NEXT: pxor %xmm3, %xmm2
855; SSE2-NEXT: pxor %xmm1, %xmm2
856; SSE2-NEXT: movdqa %xmm2, %xmm5
857; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
858; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
859; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
860; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
861; SSE2-NEXT: pand %xmm6, %xmm2
862; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
863; SSE2-NEXT: por %xmm2, %xmm4
864; SSE2-NEXT: pand %xmm4, %xmm0
865; SSE2-NEXT: pxor %xmm3, %xmm4
866; SSE2-NEXT: movdqa %xmm1, %xmm2
867; SSE2-NEXT: pandn %xmm4, %xmm2
868; SSE2-NEXT: por %xmm2, %xmm0
869; SSE2-NEXT: paddq %xmm1, %xmm0
870; SSE2-NEXT: retq
871;
872; SSE41-LABEL: unsigned_sat_variable_v2i64_using_min:
873; SSE41: # %bb.0:
874; SSE41-NEXT: movdqa %xmm0, %xmm2
875; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
876; SSE41-NEXT: pxor %xmm1, %xmm3
877; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
878; SSE41-NEXT: movdqa %xmm2, %xmm4
879; SSE41-NEXT: pxor %xmm0, %xmm4
880; SSE41-NEXT: pxor %xmm3, %xmm0
881; SSE41-NEXT: movdqa %xmm0, %xmm5
882; SSE41-NEXT: pcmpgtd %xmm4, %xmm5
883; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
884; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
885; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
886; SSE41-NEXT: pand %xmm6, %xmm4
887; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
888; SSE41-NEXT: por %xmm4, %xmm0
889; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
890; SSE41-NEXT: paddq %xmm1, %xmm3
891; SSE41-NEXT: movdqa %xmm3, %xmm0
892; SSE41-NEXT: retq
893 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
894 %c = icmp ult <2 x i64> %x, %noty
895 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty
896 %r = add <2 x i64> %s, %y
897 ret <2 x i64> %r
898}
899
900define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
901; ANY-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
902; ANY: # %bb.0:
903; ANY-NEXT: paddq %xmm0, %xmm1
904; ANY-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
905; ANY-NEXT: pxor %xmm2, %xmm0
906; ANY-NEXT: pxor %xmm1, %xmm2
907; ANY-NEXT: movdqa %xmm0, %xmm3
908; ANY-NEXT: pcmpgtd %xmm2, %xmm3
909; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
910; ANY-NEXT: pcmpeqd %xmm0, %xmm2
911; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
912; ANY-NEXT: pand %xmm4, %xmm2
913; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
914; ANY-NEXT: por %xmm1, %xmm0
915; ANY-NEXT: por %xmm2, %xmm0
916; ANY-NEXT: retq
917 %a = add <2 x i64> %x, %y
918 %c = icmp ugt <2 x i64> %x, %a
919 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
920 ret <2 x i64> %r
921}
922
923define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) {
924; ANY-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
925; ANY: # %bb.0:
926; ANY-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
927; ANY-NEXT: pcmpeqd %xmm3, %xmm3
928; ANY-NEXT: movdqa %xmm0, %xmm4
929; ANY-NEXT: paddq %xmm1, %xmm4
930; ANY-NEXT: pxor %xmm2, %xmm0
931; ANY-NEXT: pxor %xmm2, %xmm3
932; ANY-NEXT: pxor %xmm1, %xmm3
933; ANY-NEXT: movdqa %xmm0, %xmm1
934; ANY-NEXT: pcmpgtd %xmm3, %xmm1
935; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
936; ANY-NEXT: pcmpeqd %xmm0, %xmm3
937; ANY-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
938; ANY-NEXT: pand %xmm2, %xmm3
939; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
940; ANY-NEXT: por %xmm4, %xmm0
941; ANY-NEXT: por %xmm3, %xmm0
942; ANY-NEXT: retq
943 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
944 %a = add <2 x i64> %x, %y
945 %c = icmp ugt <2 x i64> %x, %noty
946 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
947 ret <2 x i64> %r
948}
949