blob: 7df16a3008d94f6d09cd9cd7089f87a606200d44 [file] [log] [blame]
Simon Pilgrim5b2fd592017-02-06 18:57:51 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrimfea153f2017-05-06 19:11:59 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
Simon Pilgrim5b2fd592017-02-06 18:57:51 +00005
Simon Pilgrim5b2fd592017-02-06 18:57:51 +00006; fold (abs c1) -> c2
7define <4 x i32> @combine_v4i32_abs_constant() {
8; CHECK-LABEL: combine_v4i32_abs_constant:
9; CHECK: # BB#0:
Simon Pilgrimcf2da962017-03-14 21:26:58 +000010; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648]
Simon Pilgrim5b2fd592017-02-06 18:57:51 +000011; CHECK-NEXT: retq
12 %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> <i32 0, i32 -1, i32 3, i32 -2147483648>)
13 ret <4 x i32> %1
14}
15
16define <16 x i16> @combine_v16i16_abs_constant() {
17; CHECK-LABEL: combine_v16i16_abs_constant:
18; CHECK: # BB#0:
Simon Pilgrimcf2da962017-03-14 21:26:58 +000019; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0]
Simon Pilgrim5b2fd592017-02-06 18:57:51 +000020; CHECK-NEXT: retq
21 %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> <i16 0, i16 1, i16 -1, i16 3, i16 -3, i16 7, i16 -7, i16 255, i16 -255, i16 4096, i16 -4096, i16 32767, i16 -32767, i16 -32768, i16 32768, i16 65536>)
22 ret <16 x i16> %1
23}
24
25; fold (abs (abs x)) -> (abs x)
26define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) {
27; CHECK-LABEL: combine_v8i16_abs_abs:
28; CHECK: # BB#0:
29; CHECK-NEXT: vpabsw %xmm0, %xmm0
Simon Pilgrim5b2fd592017-02-06 18:57:51 +000030; CHECK-NEXT: retq
Simon Pilgrimb4a9eea2017-02-07 13:15:09 +000031 %a1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a)
Simon Pilgrim2c154472017-05-06 13:44:42 +000032 %s2 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
33 %a2 = add <8 x i16> %a1, %s2
34 %x2 = xor <8 x i16> %a2, %s2
35 ret <8 x i16> %x2
Simon Pilgrim5b2fd592017-02-06 18:57:51 +000036}
37
38define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) {
39; CHECK-LABEL: combine_v32i8_abs_abs:
40; CHECK: # BB#0:
41; CHECK-NEXT: vpabsb %ymm0, %ymm0
Simon Pilgrim5b2fd592017-02-06 18:57:51 +000042; CHECK-NEXT: retq
Simon Pilgrimb4a9eea2017-02-07 13:15:09 +000043 %n1 = sub <32 x i8> zeroinitializer, %a
44 %b1 = icmp slt <32 x i8> %a, zeroinitializer
45 %a1 = select <32 x i1> %b1, <32 x i8> %n1, <32 x i8> %a
46 %a2 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a1)
47 ret <32 x i8> %a2
48}
49
50define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) {
Simon Pilgrimfea153f2017-05-06 19:11:59 +000051; AVX2-LABEL: combine_v4i64_abs_abs:
52; AVX2: # BB#0:
Dinar Temirbulatovaead31a2017-07-27 17:47:01 +000053; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
Simon Pilgrimca3a63a2017-05-09 13:14:40 +000054; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
55; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
56; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
57; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
Simon Pilgrimfea153f2017-05-06 19:11:59 +000058; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
59; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
60; AVX2-NEXT: retq
61;
62; AVX512F-LABEL: combine_v4i64_abs_abs:
63; AVX512F: # BB#0:
64; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
65; AVX512F-NEXT: vpabsq %zmm0, %zmm0
66; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
67; AVX512F-NEXT: retq
68;
69; AVX512VL-LABEL: combine_v4i64_abs_abs:
70; AVX512VL: # BB#0:
71; AVX512VL-NEXT: vpabsq %ymm0, %ymm0
72; AVX512VL-NEXT: retq
Simon Pilgrimb4a9eea2017-02-07 13:15:09 +000073 %n1 = sub <4 x i64> zeroinitializer, %a
74 %b1 = icmp slt <4 x i64> %a, zeroinitializer
75 %a1 = select <4 x i1> %b1, <4 x i64> %n1, <4 x i64> %a
76 %n2 = sub <4 x i64> zeroinitializer, %a1
77 %b2 = icmp sgt <4 x i64> %a1, zeroinitializer
78 %a2 = select <4 x i1> %b2, <4 x i64> %a1, <4 x i64> %n2
79 ret <4 x i64> %a2
Simon Pilgrim5b2fd592017-02-06 18:57:51 +000080}
81
82; fold (abs x) -> x iff not-negative
83define <16 x i8> @combine_v16i8_abs_constant(<16 x i8> %a) {
Uriel Korach5d5da5f2017-09-13 09:02:36 +000084; AVX2-LABEL: combine_v16i8_abs_constant:
85; AVX2: # BB#0:
86; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
87; AVX2-NEXT: retq
88;
89; AVX512F-LABEL: combine_v16i8_abs_constant:
90; AVX512F: # BB#0:
91; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
92; AVX512F-NEXT: retq
93;
94; AVX512VL-LABEL: combine_v16i8_abs_constant:
95; AVX512VL: # BB#0:
96; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
97; AVX512VL-NEXT: retq
Simon Pilgrim5b2fd592017-02-06 18:57:51 +000098 %1 = insertelement <16 x i8> undef, i8 15, i32 0
99 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
100 %3 = and <16 x i8> %a, %2
101 %4 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %3)
102 ret <16 x i8> %4
103}
104
105define <8 x i32> @combine_v8i32_abs_pos(<8 x i32> %a) {
106; CHECK-LABEL: combine_v8i32_abs_pos:
107; CHECK: # BB#0:
108; CHECK-NEXT: vpsrld $1, %ymm0, %ymm0
Simon Pilgrim5b2fd592017-02-06 18:57:51 +0000109; CHECK-NEXT: retq
110 %1 = lshr <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
111 %2 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %1)
112 ret <8 x i32> %2
113}
114
115declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
116declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
117declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
118
119declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
120declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
121declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone