blob: 5b667d86de132059ca5093ac5878607a68c471b2 [file] [log] [blame]
Sanjay Patel7feb3ed2018-09-10 17:40:15 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ANY,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ANY,SSE,SSE41
4
5; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.
6; Test each of those patterns with i8/i16/i32/i64.
7; Test each of those with a constant operand and a variable operand.
8; Test each of those with a 128-bit vector type.
9
10define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
11; ANY-LABEL: unsigned_sat_constant_i8_using_min:
12; ANY: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000013; ANY-NEXT: movl %edi, %eax
14; ANY-NEXT: cmpb $-43, %al
Sanjay Patel7feb3ed2018-09-10 17:40:15 +000015; ANY-NEXT: jb .LBB0_2
16; ANY-NEXT: # %bb.1:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000017; ANY-NEXT: movb $-43, %al
Sanjay Patel7feb3ed2018-09-10 17:40:15 +000018; ANY-NEXT: .LBB0_2:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000019; ANY-NEXT: addb $42, %al
20; ANY-NEXT: # kill: def $al killed $al killed $eax
Sanjay Patel7feb3ed2018-09-10 17:40:15 +000021; ANY-NEXT: retq
22 %c = icmp ult i8 %x, -43
23 %s = select i1 %c, i8 %x, i8 -43
24 %r = add i8 %s, 42
25 ret i8 %r
26}
27
28define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
29; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
30; ANY: # %bb.0:
31; ANY-NEXT: addb $42, %dil
32; ANY-NEXT: movb $-1, %al
33; ANY-NEXT: jb .LBB1_2
34; ANY-NEXT: # %bb.1:
35; ANY-NEXT: movl %edi, %eax
36; ANY-NEXT: .LBB1_2:
37; ANY-NEXT: retq
38 %a = add i8 %x, 42
39 %c = icmp ugt i8 %x, %a
40 %r = select i1 %c, i8 -1, i8 %a
41 ret i8 %r
42}
43
44define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
45; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
46; ANY: # %bb.0:
47; ANY-NEXT: cmpb $-43, %dil
48; ANY-NEXT: movb $-1, %al
49; ANY-NEXT: ja .LBB2_2
50; ANY-NEXT: # %bb.1:
51; ANY-NEXT: addb $42, %dil
52; ANY-NEXT: movl %edi, %eax
53; ANY-NEXT: .LBB2_2:
54; ANY-NEXT: retq
55 %a = add i8 %x, 42
56 %c = icmp ugt i8 %x, -43
57 %r = select i1 %c, i8 -1, i8 %a
58 ret i8 %r
59}
60
61define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
62; ANY-LABEL: unsigned_sat_constant_i16_using_min:
63; ANY: # %bb.0:
64; ANY-NEXT: movzwl %di, %eax
65; ANY-NEXT: cmpl $65493, %eax # imm = 0xFFD5
66; ANY-NEXT: movl $65493, %eax # imm = 0xFFD5
67; ANY-NEXT: cmovbl %edi, %eax
68; ANY-NEXT: addl $42, %eax
69; ANY-NEXT: # kill: def $ax killed $ax killed $eax
70; ANY-NEXT: retq
71 %c = icmp ult i16 %x, -43
72 %s = select i1 %c, i16 %x, i16 -43
73 %r = add i16 %s, 42
74 ret i16 %r
75}
76
77define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
78; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
79; ANY: # %bb.0:
80; ANY-NEXT: addw $42, %di
81; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
82; ANY-NEXT: cmovael %edi, %eax
83; ANY-NEXT: # kill: def $ax killed $ax killed $eax
84; ANY-NEXT: retq
85 %a = add i16 %x, 42
86 %c = icmp ugt i16 %x, %a
87 %r = select i1 %c, i16 -1, i16 %a
88 ret i16 %r
89}
90
91define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
92; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
93; ANY: # %bb.0:
94; ANY-NEXT: # kill: def $edi killed $edi def $rdi
95; ANY-NEXT: leal 42(%rdi), %ecx
96; ANY-NEXT: movzwl %di, %eax
97; ANY-NEXT: cmpl $65493, %eax # imm = 0xFFD5
98; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
99; ANY-NEXT: cmovbel %ecx, %eax
100; ANY-NEXT: # kill: def $ax killed $ax killed $eax
101; ANY-NEXT: retq
102 %a = add i16 %x, 42
103 %c = icmp ugt i16 %x, -43
104 %r = select i1 %c, i16 -1, i16 %a
105 ret i16 %r
106}
107
108define i32 @unsigned_sat_constant_i32_using_min(i32 %x) {
109; ANY-LABEL: unsigned_sat_constant_i32_using_min:
110; ANY: # %bb.0:
111; ANY-NEXT: cmpl $-43, %edi
112; ANY-NEXT: movl $-43, %eax
113; ANY-NEXT: cmovbl %edi, %eax
114; ANY-NEXT: addl $42, %eax
115; ANY-NEXT: retq
116 %c = icmp ult i32 %x, -43
117 %s = select i1 %c, i32 %x, i32 -43
118 %r = add i32 %s, 42
119 ret i32 %r
120}
121
122define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
123; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
124; ANY: # %bb.0:
125; ANY-NEXT: addl $42, %edi
126; ANY-NEXT: movl $-1, %eax
127; ANY-NEXT: cmovael %edi, %eax
128; ANY-NEXT: retq
129 %a = add i32 %x, 42
130 %c = icmp ugt i32 %x, %a
131 %r = select i1 %c, i32 -1, i32 %a
132 ret i32 %r
133}
134
135define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
136; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
137; ANY: # %bb.0:
138; ANY-NEXT: # kill: def $edi killed $edi def $rdi
139; ANY-NEXT: leal 42(%rdi), %ecx
140; ANY-NEXT: cmpl $-43, %edi
141; ANY-NEXT: movl $-1, %eax
142; ANY-NEXT: cmovbel %ecx, %eax
143; ANY-NEXT: retq
144 %a = add i32 %x, 42
145 %c = icmp ugt i32 %x, -43
146 %r = select i1 %c, i32 -1, i32 %a
147 ret i32 %r
148}
149
150define i64 @unsigned_sat_constant_i64_using_min(i64 %x) {
151; ANY-LABEL: unsigned_sat_constant_i64_using_min:
152; ANY: # %bb.0:
153; ANY-NEXT: cmpq $-43, %rdi
154; ANY-NEXT: movq $-43, %rax
155; ANY-NEXT: cmovbq %rdi, %rax
156; ANY-NEXT: addq $42, %rax
157; ANY-NEXT: retq
158 %c = icmp ult i64 %x, -43
159 %s = select i1 %c, i64 %x, i64 -43
160 %r = add i64 %s, 42
161 ret i64 %r
162}
163
164define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
165; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_sum:
166; ANY: # %bb.0:
167; ANY-NEXT: addq $42, %rdi
168; ANY-NEXT: movq $-1, %rax
169; ANY-NEXT: cmovaeq %rdi, %rax
170; ANY-NEXT: retq
171 %a = add i64 %x, 42
172 %c = icmp ugt i64 %x, %a
173 %r = select i1 %c, i64 -1, i64 %a
174 ret i64 %r
175}
176
177define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
178; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
179; ANY: # %bb.0:
180; ANY-NEXT: cmpq $-43, %rdi
181; ANY-NEXT: leaq 42(%rdi), %rax
182; ANY-NEXT: movq $-1, %rcx
183; ANY-NEXT: cmovaq %rcx, %rax
184; ANY-NEXT: retq
185 %a = add i64 %x, 42
186 %c = icmp ugt i64 %x, -43
187 %r = select i1 %c, i64 -1, i64 %a
188 ret i64 %r
189}
190
191define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
192; ANY-LABEL: unsigned_sat_variable_i8_using_min:
193; ANY: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000194; ANY-NEXT: movl %edi, %eax
195; ANY-NEXT: movl %esi, %ecx
196; ANY-NEXT: notb %cl
197; ANY-NEXT: cmpb %cl, %al
Sanjay Patel7feb3ed2018-09-10 17:40:15 +0000198; ANY-NEXT: jb .LBB12_2
199; ANY-NEXT: # %bb.1:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000200; ANY-NEXT: movl %ecx, %eax
Sanjay Patel7feb3ed2018-09-10 17:40:15 +0000201; ANY-NEXT: .LBB12_2:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000202; ANY-NEXT: addb %sil, %al
203; ANY-NEXT: # kill: def $al killed $al killed $eax
Sanjay Patel7feb3ed2018-09-10 17:40:15 +0000204; ANY-NEXT: retq
205 %noty = xor i8 %y, -1
206 %c = icmp ult i8 %x, %noty
207 %s = select i1 %c, i8 %x, i8 %noty
208 %r = add i8 %s, %y
209 ret i8 %r
210}
211
212define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
213; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
214; ANY: # %bb.0:
215; ANY-NEXT: addb %sil, %dil
216; ANY-NEXT: movb $-1, %al
217; ANY-NEXT: jb .LBB13_2
218; ANY-NEXT: # %bb.1:
219; ANY-NEXT: movl %edi, %eax
220; ANY-NEXT: .LBB13_2:
221; ANY-NEXT: retq
222 %a = add i8 %x, %y
223 %c = icmp ugt i8 %x, %a
224 %r = select i1 %c, i8 -1, i8 %a
225 ret i8 %r
226}
227
228define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
229; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
230; ANY: # %bb.0:
231; ANY-NEXT: movl %esi, %eax
232; ANY-NEXT: notb %al
233; ANY-NEXT: cmpb %al, %dil
234; ANY-NEXT: movb $-1, %al
235; ANY-NEXT: ja .LBB14_2
236; ANY-NEXT: # %bb.1:
237; ANY-NEXT: addb %sil, %dil
238; ANY-NEXT: movl %edi, %eax
239; ANY-NEXT: .LBB14_2:
240; ANY-NEXT: retq
241 %noty = xor i8 %y, -1
242 %a = add i8 %x, %y
243 %c = icmp ugt i8 %x, %noty
244 %r = select i1 %c, i8 -1, i8 %a
245 ret i8 %r
246}
247
248define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
249; ANY-LABEL: unsigned_sat_variable_i16_using_min:
250; ANY: # %bb.0:
251; ANY-NEXT: # kill: def $esi killed $esi def $rsi
252; ANY-NEXT: movl %esi, %eax
253; ANY-NEXT: notl %eax
254; ANY-NEXT: cmpw %ax, %di
255; ANY-NEXT: cmovbl %edi, %eax
256; ANY-NEXT: leal (%rax,%rsi), %eax
257; ANY-NEXT: # kill: def $ax killed $ax killed $eax
258; ANY-NEXT: retq
259 %noty = xor i16 %y, -1
260 %c = icmp ult i16 %x, %noty
261 %s = select i1 %c, i16 %x, i16 %noty
262 %r = add i16 %s, %y
263 ret i16 %r
264}
265
266define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
267; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
268; ANY: # %bb.0:
269; ANY-NEXT: addw %si, %di
270; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
271; ANY-NEXT: cmovael %edi, %eax
272; ANY-NEXT: # kill: def $ax killed $ax killed $eax
273; ANY-NEXT: retq
274 %a = add i16 %x, %y
275 %c = icmp ugt i16 %x, %a
276 %r = select i1 %c, i16 -1, i16 %a
277 ret i16 %r
278}
279
280define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
281; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
282; ANY: # %bb.0:
283; ANY-NEXT: # kill: def $esi killed $esi def $rsi
284; ANY-NEXT: # kill: def $edi killed $edi def $rdi
285; ANY-NEXT: leal (%rdi,%rsi), %ecx
286; ANY-NEXT: notl %esi
287; ANY-NEXT: cmpw %si, %di
288; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF
289; ANY-NEXT: cmovbel %ecx, %eax
290; ANY-NEXT: # kill: def $ax killed $ax killed $eax
291; ANY-NEXT: retq
292 %noty = xor i16 %y, -1
293 %a = add i16 %x, %y
294 %c = icmp ugt i16 %x, %noty
295 %r = select i1 %c, i16 -1, i16 %a
296 ret i16 %r
297}
298
299define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) {
300; ANY-LABEL: unsigned_sat_variable_i32_using_min:
301; ANY: # %bb.0:
302; ANY-NEXT: # kill: def $esi killed $esi def $rsi
303; ANY-NEXT: movl %esi, %eax
304; ANY-NEXT: notl %eax
305; ANY-NEXT: cmpl %eax, %edi
306; ANY-NEXT: cmovbl %edi, %eax
307; ANY-NEXT: leal (%rax,%rsi), %eax
308; ANY-NEXT: retq
309 %noty = xor i32 %y, -1
310 %c = icmp ult i32 %x, %noty
311 %s = select i1 %c, i32 %x, i32 %noty
312 %r = add i32 %s, %y
313 ret i32 %r
314}
315
316define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
317; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
318; ANY: # %bb.0:
319; ANY-NEXT: addl %esi, %edi
320; ANY-NEXT: movl $-1, %eax
321; ANY-NEXT: cmovael %edi, %eax
322; ANY-NEXT: retq
323 %a = add i32 %x, %y
324 %c = icmp ugt i32 %x, %a
325 %r = select i1 %c, i32 -1, i32 %a
326 ret i32 %r
327}
328
329define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
330; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
331; ANY: # %bb.0:
332; ANY-NEXT: # kill: def $esi killed $esi def $rsi
333; ANY-NEXT: # kill: def $edi killed $edi def $rdi
334; ANY-NEXT: leal (%rdi,%rsi), %ecx
335; ANY-NEXT: notl %esi
336; ANY-NEXT: cmpl %esi, %edi
337; ANY-NEXT: movl $-1, %eax
338; ANY-NEXT: cmovbel %ecx, %eax
339; ANY-NEXT: retq
340 %noty = xor i32 %y, -1
341 %a = add i32 %x, %y
342 %c = icmp ugt i32 %x, %noty
343 %r = select i1 %c, i32 -1, i32 %a
344 ret i32 %r
345}
346
347define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) {
348; ANY-LABEL: unsigned_sat_variable_i64_using_min:
349; ANY: # %bb.0:
350; ANY-NEXT: movq %rsi, %rax
351; ANY-NEXT: notq %rax
352; ANY-NEXT: cmpq %rax, %rdi
353; ANY-NEXT: cmovbq %rdi, %rax
354; ANY-NEXT: leaq (%rax,%rsi), %rax
355; ANY-NEXT: retq
356 %noty = xor i64 %y, -1
357 %c = icmp ult i64 %x, %noty
358 %s = select i1 %c, i64 %x, i64 %noty
359 %r = add i64 %s, %y
360 ret i64 %r
361}
362
363define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
364; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_sum:
365; ANY: # %bb.0:
366; ANY-NEXT: addq %rsi, %rdi
367; ANY-NEXT: movq $-1, %rax
368; ANY-NEXT: cmovaeq %rdi, %rax
369; ANY-NEXT: retq
370 %a = add i64 %x, %y
371 %c = icmp ugt i64 %x, %a
372 %r = select i1 %c, i64 -1, i64 %a
373 ret i64 %r
374}
375
376define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
377; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
378; ANY: # %bb.0:
379; ANY-NEXT: leaq (%rdi,%rsi), %rcx
380; ANY-NEXT: notq %rsi
381; ANY-NEXT: cmpq %rsi, %rdi
382; ANY-NEXT: movq $-1, %rax
383; ANY-NEXT: cmovbeq %rcx, %rax
384; ANY-NEXT: retq
385 %noty = xor i64 %y, -1
386 %a = add i64 %x, %y
387 %c = icmp ugt i64 %x, %noty
388 %r = select i1 %c, i64 -1, i64 %a
389 ret i64 %r
390}
391
392define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
393; ANY-LABEL: unsigned_sat_constant_v16i8_using_min:
394; ANY: # %bb.0:
395; ANY-NEXT: pminub {{.*}}(%rip), %xmm0
396; ANY-NEXT: paddb {{.*}}(%rip), %xmm0
397; ANY-NEXT: retq
398 %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
399 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
400 %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
401 ret <16 x i8> %r
402}
403
404define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
405; ANY-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
406; ANY: # %bb.0:
407; ANY-NEXT: paddusb {{.*}}(%rip), %xmm0
408; ANY-NEXT: retq
409 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
410 %c = icmp ugt <16 x i8> %x, %a
411 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
412 ret <16 x i8> %r
413}
414
415define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
416; ANY-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
417; ANY: # %bb.0:
418; ANY-NEXT: paddusb {{.*}}(%rip), %xmm0
419; ANY-NEXT: retq
420 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
421 %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
422 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
423 ret <16 x i8> %r
424}
425
426define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
427; SSE2-LABEL: unsigned_sat_constant_v8i16_using_min:
428; SSE2: # %bb.0:
429; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
430; SSE2-NEXT: pxor %xmm1, %xmm0
431; SSE2-NEXT: pminsw {{.*}}(%rip), %xmm0
432; SSE2-NEXT: pxor %xmm1, %xmm0
433; SSE2-NEXT: paddw {{.*}}(%rip), %xmm0
434; SSE2-NEXT: retq
435;
436; SSE41-LABEL: unsigned_sat_constant_v8i16_using_min:
437; SSE41: # %bb.0:
438; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0
439; SSE41-NEXT: paddw {{.*}}(%rip), %xmm0
440; SSE41-NEXT: retq
441 %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
442 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
443 %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
444 ret <8 x i16> %r
445}
446
447define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
448; ANY-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
449; ANY: # %bb.0:
450; ANY-NEXT: paddusw {{.*}}(%rip), %xmm0
451; ANY-NEXT: retq
452 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
453 %c = icmp ugt <8 x i16> %x, %a
454 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
455 ret <8 x i16> %r
456}
457
458define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
459; ANY-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
460; ANY: # %bb.0:
461; ANY-NEXT: paddusw {{.*}}(%rip), %xmm0
462; ANY-NEXT: retq
463 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
464 %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
465 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
466 ret <8 x i16> %r
467}
468
469define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
470; SSE2-LABEL: unsigned_sat_constant_v4i32_using_min:
471; SSE2: # %bb.0:
472; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
473; SSE2-NEXT: pxor %xmm0, %xmm1
474; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483605,2147483605,2147483605,2147483605]
475; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
476; SSE2-NEXT: pand %xmm2, %xmm0
477; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
478; SSE2-NEXT: por %xmm2, %xmm0
479; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
480; SSE2-NEXT: retq
481;
482; SSE41-LABEL: unsigned_sat_constant_v4i32_using_min:
483; SSE41: # %bb.0:
484; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0
485; SSE41-NEXT: paddd {{.*}}(%rip), %xmm0
486; SSE41-NEXT: retq
487 %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
488 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43>
489 %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42>
490 ret <4 x i32> %r
491}
492
493define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
494; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
495; SSE2: # %bb.0:
496; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
497; SSE2-NEXT: paddd %xmm0, %xmm1
498; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
499; SSE2-NEXT: pxor %xmm2, %xmm0
500; SSE2-NEXT: pxor %xmm1, %xmm2
501; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
502; SSE2-NEXT: por %xmm1, %xmm0
503; SSE2-NEXT: retq
504;
505; SSE41-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
506; SSE41: # %bb.0:
507; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [42,42,42,42]
508; SSE41-NEXT: paddd %xmm0, %xmm2
509; SSE41-NEXT: movdqa %xmm0, %xmm1
510; SSE41-NEXT: pminud %xmm2, %xmm1
511; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
512; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
513; SSE41-NEXT: pxor %xmm0, %xmm1
514; SSE41-NEXT: por %xmm2, %xmm1
515; SSE41-NEXT: movdqa %xmm1, %xmm0
516; SSE41-NEXT: retq
517 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
518 %c = icmp ugt <4 x i32> %x, %a
519 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
520 ret <4 x i32> %r
521}
522
523define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
524; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
525; SSE2: # %bb.0:
526; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
527; SSE2-NEXT: paddd %xmm0, %xmm1
528; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
529; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
530; SSE2-NEXT: por %xmm1, %xmm0
531; SSE2-NEXT: retq
532;
533; SSE41-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
534; SSE41: # %bb.0:
535; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
536; SSE41-NEXT: paddd %xmm0, %xmm1
537; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
538; SSE41-NEXT: pminud %xmm0, %xmm2
539; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
540; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
541; SSE41-NEXT: pxor %xmm2, %xmm0
542; SSE41-NEXT: por %xmm1, %xmm0
543; SSE41-NEXT: retq
544 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
545 %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
546 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
547 ret <4 x i32> %r
548}
549
550define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
551; SSE2-LABEL: unsigned_sat_constant_v2i64_using_min:
552; SSE2: # %bb.0:
553; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
554; SSE2-NEXT: pxor %xmm0, %xmm1
555; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
556; SSE2-NEXT: movdqa %xmm2, %xmm3
557; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
558; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
559; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
560; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
561; SSE2-NEXT: pand %xmm4, %xmm1
562; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
563; SSE2-NEXT: por %xmm1, %xmm2
564; SSE2-NEXT: pand %xmm2, %xmm0
565; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
566; SSE2-NEXT: por %xmm2, %xmm0
567; SSE2-NEXT: paddq {{.*}}(%rip), %xmm0
568; SSE2-NEXT: retq
569;
570; SSE41-LABEL: unsigned_sat_constant_v2i64_using_min:
571; SSE41: # %bb.0:
572; SSE41-NEXT: movdqa %xmm0, %xmm1
573; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573]
574; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
575; SSE41-NEXT: pxor %xmm1, %xmm0
576; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117]
577; SSE41-NEXT: movdqa %xmm3, %xmm4
578; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
579; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
580; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
581; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
582; SSE41-NEXT: pand %xmm5, %xmm3
583; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
584; SSE41-NEXT: por %xmm3, %xmm0
585; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
586; SSE41-NEXT: paddq {{.*}}(%rip), %xmm2
587; SSE41-NEXT: movdqa %xmm2, %xmm0
588; SSE41-NEXT: retq
589 %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43>
590 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43>
591 %r = add <2 x i64> %s, <i64 42, i64 42>
592 ret <2 x i64> %r
593}
594
595define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
596; ANY-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
597; ANY: # %bb.0:
598; ANY-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
599; ANY-NEXT: paddq %xmm0, %xmm1
600; ANY-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
601; ANY-NEXT: pxor %xmm2, %xmm0
602; ANY-NEXT: pxor %xmm1, %xmm2
603; ANY-NEXT: movdqa %xmm0, %xmm3
604; ANY-NEXT: pcmpgtd %xmm2, %xmm3
605; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
606; ANY-NEXT: pcmpeqd %xmm0, %xmm2
607; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
608; ANY-NEXT: pand %xmm4, %xmm2
609; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
610; ANY-NEXT: por %xmm1, %xmm0
611; ANY-NEXT: por %xmm2, %xmm0
612; ANY-NEXT: retq
613 %a = add <2 x i64> %x, <i64 42, i64 42>
614 %c = icmp ugt <2 x i64> %x, %a
615 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
616 ret <2 x i64> %r
617}
618
619define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
620; ANY-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
621; ANY: # %bb.0:
622; ANY-NEXT: movdqa {{.*#+}} xmm1 = [42,42]
623; ANY-NEXT: paddq %xmm0, %xmm1
624; ANY-NEXT: pxor {{.*}}(%rip), %xmm0
625; ANY-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
626; ANY-NEXT: movdqa %xmm0, %xmm3
627; ANY-NEXT: pcmpgtd %xmm2, %xmm3
628; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
629; ANY-NEXT: pcmpeqd %xmm2, %xmm0
630; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
631; ANY-NEXT: pand %xmm4, %xmm2
632; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
633; ANY-NEXT: por %xmm1, %xmm0
634; ANY-NEXT: por %xmm2, %xmm0
635; ANY-NEXT: retq
636 %a = add <2 x i64> %x, <i64 42, i64 42>
637 %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
638 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
639 ret <2 x i64> %r
640}
641
642define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) {
643; ANY-LABEL: unsigned_sat_variable_v16i8_using_min:
644; ANY: # %bb.0:
645; ANY-NEXT: pcmpeqd %xmm2, %xmm2
646; ANY-NEXT: pxor %xmm1, %xmm2
647; ANY-NEXT: pminub %xmm2, %xmm0
648; ANY-NEXT: paddb %xmm1, %xmm0
649; ANY-NEXT: retq
650 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
651 %c = icmp ult <16 x i8> %x, %noty
652 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty
653 %r = add <16 x i8> %s, %y
654 ret <16 x i8> %r
655}
656
657define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
658; ANY-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
659; ANY: # %bb.0:
660; ANY-NEXT: paddusb %xmm1, %xmm0
661; ANY-NEXT: retq
662 %a = add <16 x i8> %x, %y
663 %c = icmp ugt <16 x i8> %x, %a
664 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
665 ret <16 x i8> %r
666}
667
668define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) {
669; ANY-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
670; ANY: # %bb.0:
671; ANY-NEXT: pcmpeqd %xmm2, %xmm2
672; ANY-NEXT: movdqa %xmm0, %xmm3
673; ANY-NEXT: paddb %xmm1, %xmm3
674; ANY-NEXT: pxor %xmm2, %xmm1
675; ANY-NEXT: pminub %xmm0, %xmm1
676; ANY-NEXT: pcmpeqb %xmm1, %xmm0
677; ANY-NEXT: pxor %xmm2, %xmm0
678; ANY-NEXT: por %xmm3, %xmm0
679; ANY-NEXT: retq
680 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
681 %a = add <16 x i8> %x, %y
682 %c = icmp ugt <16 x i8> %x, %noty
683 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
684 ret <16 x i8> %r
685}
686
687define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) {
688; SSE2-LABEL: unsigned_sat_variable_v8i16_using_min:
689; SSE2: # %bb.0:
690; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
691; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32768,32768,32768,32768,32768,32768,32768,32768]
692; SSE2-NEXT: pxor %xmm3, %xmm0
693; SSE2-NEXT: pxor %xmm3, %xmm2
694; SSE2-NEXT: pxor %xmm1, %xmm2
695; SSE2-NEXT: pminsw %xmm2, %xmm0
696; SSE2-NEXT: pxor %xmm3, %xmm0
697; SSE2-NEXT: paddw %xmm1, %xmm0
698; SSE2-NEXT: retq
699;
700; SSE41-LABEL: unsigned_sat_variable_v8i16_using_min:
701; SSE41: # %bb.0:
702; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
703; SSE41-NEXT: pxor %xmm1, %xmm2
704; SSE41-NEXT: pminuw %xmm2, %xmm0
705; SSE41-NEXT: paddw %xmm1, %xmm0
706; SSE41-NEXT: retq
707 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
708 %c = icmp ult <8 x i16> %x, %noty
709 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty
710 %r = add <8 x i16> %s, %y
711 ret <8 x i16> %r
712}
713
714define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
715; ANY-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
716; ANY: # %bb.0:
717; ANY-NEXT: paddusw %xmm1, %xmm0
718; ANY-NEXT: retq
719 %a = add <8 x i16> %x, %y
720 %c = icmp ugt <8 x i16> %x, %a
721 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
722 ret <8 x i16> %r
723}
724
725define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) {
726; SSE2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
727; SSE2: # %bb.0:
728; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
729; SSE2-NEXT: movdqa %xmm0, %xmm3
730; SSE2-NEXT: paddw %xmm1, %xmm3
731; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
732; SSE2-NEXT: pxor %xmm4, %xmm0
733; SSE2-NEXT: pxor %xmm4, %xmm2
734; SSE2-NEXT: pxor %xmm1, %xmm2
735; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
736; SSE2-NEXT: por %xmm3, %xmm0
737; SSE2-NEXT: retq
738;
739; SSE41-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
740; SSE41: # %bb.0:
741; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
742; SSE41-NEXT: movdqa %xmm0, %xmm3
743; SSE41-NEXT: paddw %xmm1, %xmm3
744; SSE41-NEXT: pxor %xmm2, %xmm1
745; SSE41-NEXT: pminuw %xmm0, %xmm1
746; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
747; SSE41-NEXT: pxor %xmm2, %xmm0
748; SSE41-NEXT: por %xmm3, %xmm0
749; SSE41-NEXT: retq
750 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
751 %a = add <8 x i16> %x, %y
752 %c = icmp ugt <8 x i16> %x, %noty
753 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
754 ret <8 x i16> %r
755}
756
757define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) {
758; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min:
759; SSE2: # %bb.0:
760; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
761; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
762; SSE2-NEXT: movdqa %xmm0, %xmm4
763; SSE2-NEXT: pxor %xmm3, %xmm4
764; SSE2-NEXT: pxor %xmm2, %xmm3
765; SSE2-NEXT: pxor %xmm1, %xmm3
766; SSE2-NEXT: pcmpgtd %xmm4, %xmm3
767; SSE2-NEXT: pand %xmm3, %xmm0
768; SSE2-NEXT: pxor %xmm2, %xmm3
769; SSE2-NEXT: movdqa %xmm1, %xmm2
770; SSE2-NEXT: pandn %xmm3, %xmm2
771; SSE2-NEXT: por %xmm2, %xmm0
772; SSE2-NEXT: paddd %xmm1, %xmm0
773; SSE2-NEXT: retq
774;
775; SSE41-LABEL: unsigned_sat_variable_v4i32_using_min:
776; SSE41: # %bb.0:
777; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
778; SSE41-NEXT: pxor %xmm1, %xmm2
779; SSE41-NEXT: pminud %xmm2, %xmm0
780; SSE41-NEXT: paddd %xmm1, %xmm0
781; SSE41-NEXT: retq
782 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
783 %c = icmp ult <4 x i32> %x, %noty
784 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty
785 %r = add <4 x i32> %s, %y
786 ret <4 x i32> %r
787}
788
789define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
790; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
791; SSE2: # %bb.0:
792; SSE2-NEXT: paddd %xmm0, %xmm1
793; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
794; SSE2-NEXT: pxor %xmm2, %xmm0
795; SSE2-NEXT: pxor %xmm1, %xmm2
796; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
797; SSE2-NEXT: por %xmm1, %xmm0
798; SSE2-NEXT: retq
799;
800; SSE41-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
801; SSE41: # %bb.0:
802; SSE41-NEXT: paddd %xmm0, %xmm1
803; SSE41-NEXT: movdqa %xmm0, %xmm2
804; SSE41-NEXT: pminud %xmm1, %xmm2
805; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
806; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
807; SSE41-NEXT: pxor %xmm0, %xmm2
808; SSE41-NEXT: por %xmm1, %xmm2
809; SSE41-NEXT: movdqa %xmm2, %xmm0
810; SSE41-NEXT: retq
811 %a = add <4 x i32> %x, %y
812 %c = icmp ugt <4 x i32> %x, %a
813 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
814 ret <4 x i32> %r
815}
816
817define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) {
818; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
819; SSE2: # %bb.0:
820; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
821; SSE2-NEXT: movdqa %xmm0, %xmm3
822; SSE2-NEXT: paddd %xmm1, %xmm3
823; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
824; SSE2-NEXT: pxor %xmm4, %xmm0
825; SSE2-NEXT: pxor %xmm4, %xmm2
826; SSE2-NEXT: pxor %xmm1, %xmm2
827; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
828; SSE2-NEXT: por %xmm3, %xmm0
829; SSE2-NEXT: retq
830;
831; SSE41-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
832; SSE41: # %bb.0:
833; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
834; SSE41-NEXT: movdqa %xmm0, %xmm3
835; SSE41-NEXT: paddd %xmm1, %xmm3
836; SSE41-NEXT: pxor %xmm2, %xmm1
837; SSE41-NEXT: pminud %xmm0, %xmm1
838; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
839; SSE41-NEXT: pxor %xmm2, %xmm0
840; SSE41-NEXT: por %xmm3, %xmm0
841; SSE41-NEXT: retq
842 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
843 %a = add <4 x i32> %x, %y
844 %c = icmp ugt <4 x i32> %x, %noty
845 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
846 ret <4 x i32> %r
847}
848
849define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) {
850; SSE2-LABEL: unsigned_sat_variable_v2i64_using_min:
851; SSE2: # %bb.0:
852; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
853; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
854; SSE2-NEXT: movdqa %xmm0, %xmm4
855; SSE2-NEXT: pxor %xmm2, %xmm4
856; SSE2-NEXT: pxor %xmm3, %xmm2
857; SSE2-NEXT: pxor %xmm1, %xmm2
858; SSE2-NEXT: movdqa %xmm2, %xmm5
859; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
860; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
861; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
862; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
863; SSE2-NEXT: pand %xmm6, %xmm2
864; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
865; SSE2-NEXT: por %xmm2, %xmm4
866; SSE2-NEXT: pand %xmm4, %xmm0
867; SSE2-NEXT: pxor %xmm3, %xmm4
868; SSE2-NEXT: movdqa %xmm1, %xmm2
869; SSE2-NEXT: pandn %xmm4, %xmm2
870; SSE2-NEXT: por %xmm2, %xmm0
871; SSE2-NEXT: paddq %xmm1, %xmm0
872; SSE2-NEXT: retq
873;
874; SSE41-LABEL: unsigned_sat_variable_v2i64_using_min:
875; SSE41: # %bb.0:
876; SSE41-NEXT: movdqa %xmm0, %xmm2
877; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
878; SSE41-NEXT: pxor %xmm1, %xmm3
879; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
880; SSE41-NEXT: movdqa %xmm2, %xmm4
881; SSE41-NEXT: pxor %xmm0, %xmm4
882; SSE41-NEXT: pxor %xmm3, %xmm0
883; SSE41-NEXT: movdqa %xmm0, %xmm5
884; SSE41-NEXT: pcmpgtd %xmm4, %xmm5
885; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
886; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
887; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
888; SSE41-NEXT: pand %xmm6, %xmm4
889; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
890; SSE41-NEXT: por %xmm4, %xmm0
891; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
892; SSE41-NEXT: paddq %xmm1, %xmm3
893; SSE41-NEXT: movdqa %xmm3, %xmm0
894; SSE41-NEXT: retq
895 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
896 %c = icmp ult <2 x i64> %x, %noty
897 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty
898 %r = add <2 x i64> %s, %y
899 ret <2 x i64> %r
900}
901
902define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
903; ANY-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
904; ANY: # %bb.0:
905; ANY-NEXT: paddq %xmm0, %xmm1
906; ANY-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
907; ANY-NEXT: pxor %xmm2, %xmm0
908; ANY-NEXT: pxor %xmm1, %xmm2
909; ANY-NEXT: movdqa %xmm0, %xmm3
910; ANY-NEXT: pcmpgtd %xmm2, %xmm3
911; ANY-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
912; ANY-NEXT: pcmpeqd %xmm0, %xmm2
913; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
914; ANY-NEXT: pand %xmm4, %xmm2
915; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
916; ANY-NEXT: por %xmm1, %xmm0
917; ANY-NEXT: por %xmm2, %xmm0
918; ANY-NEXT: retq
919 %a = add <2 x i64> %x, %y
920 %c = icmp ugt <2 x i64> %x, %a
921 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
922 ret <2 x i64> %r
923}
924
925define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) {
926; ANY-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
927; ANY: # %bb.0:
928; ANY-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
929; ANY-NEXT: pcmpeqd %xmm3, %xmm3
930; ANY-NEXT: movdqa %xmm0, %xmm4
931; ANY-NEXT: paddq %xmm1, %xmm4
932; ANY-NEXT: pxor %xmm2, %xmm0
933; ANY-NEXT: pxor %xmm2, %xmm3
934; ANY-NEXT: pxor %xmm1, %xmm3
935; ANY-NEXT: movdqa %xmm0, %xmm1
936; ANY-NEXT: pcmpgtd %xmm3, %xmm1
937; ANY-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
938; ANY-NEXT: pcmpeqd %xmm0, %xmm3
939; ANY-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
940; ANY-NEXT: pand %xmm2, %xmm3
941; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
942; ANY-NEXT: por %xmm4, %xmm0
943; ANY-NEXT: por %xmm3, %xmm0
944; ANY-NEXT: retq
945 %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
946 %a = add <2 x i64> %x, %y
947 %c = icmp ugt <2 x i64> %x, %noty
948 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
949 ret <2 x i64> %r
950}
951