blob: 8324c5810f91e9befce6d2223c976413d646979b [file] [log] [blame]
Ahmed Bougacha671795a2016-03-03 16:53:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
3
4declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
5declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
Simon Pilgrimbec65432016-07-05 20:11:29 +00006declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +00007declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
Ahmed Bougacha671795a2016-03-03 16:53:50 +00008
Simon Pilgrim8dd73e32016-06-11 13:18:21 +00009define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
10; CHECK-LABEL: combine_pshufb_pslldq:
11; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000012; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000013; CHECK-NEXT: retq
14 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
15 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
16 ret <32 x i8> %2
17}
18
19define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
20; CHECK-LABEL: combine_pshufb_psrldq:
21; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000022; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000023; CHECK-NEXT: retq
24 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
25 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
26 ret <32 x i8> %2
27}
28
Ahmed Bougacha671795a2016-03-03 16:53:50 +000029define <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) {
30; CHECK-LABEL: combine_pshufb_vpermd:
31; CHECK: # BB#0:
32; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
33; CHECK-NEXT: retq
34 %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
35 %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8>
36 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
37 ret <32 x i8> %tmp2
38}
39
40define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) {
41; CHECK-LABEL: combine_pshufb_vpermps:
42; CHECK: # BB#0:
43; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
44; CHECK-NEXT: retq
45 %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
46 %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>
47 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
48 ret <32 x i8> %tmp2
49}
Simon Pilgrim21b2c562016-05-02 19:46:58 +000050
51define <4 x i64> @combine_permq_pshufb(<4 x i64> %a0) {
52; CHECK-LABEL: combine_permq_pshufb:
53; CHECK: # BB#0:
54; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
Simon Pilgrimc15d2172016-06-28 08:08:15 +000055; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
Simon Pilgrim21b2c562016-05-02 19:46:58 +000056; CHECK-NEXT: retq
57 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
58 %2 = bitcast <4 x i64> %1 to <32 x i8>
59 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
60 %4 = bitcast <32 x i8> %3 to <4 x i64>
61 ret <4 x i64> %4
62}
Simon Pilgrimbec65432016-07-05 20:11:29 +000063
64define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
65; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128:
66; CHECK: # BB#0:
67; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
68; CHECK-NEXT: retq
69 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer)
70 ret <16 x i8> %1
71}
72
73define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
74; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
75; CHECK: # BB#0:
76; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
77; CHECK-NEXT: retq
78 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
79 %2 = bitcast <4 x i64> %1 to <32 x i8>
80 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer)
81 %4 = bitcast <32 x i8> %3 to <8 x i32>
82 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
83 %6 = bitcast <8 x i32> %5 to <32 x i8>
84 ret <32 x i8> %6
85}
86
87define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
88; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128:
89; CHECK: # BB#0:
90; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
91; CHECK-NEXT: retq
92 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
93 ret <16 x i8> %1
94}
95
96define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
97; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
98; CHECK: # BB#0:
99; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
100; CHECK-NEXT: retq
101 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
102 %2 = bitcast <4 x i64> %1 to <32 x i8>
103 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
104 %4 = bitcast <32 x i8> %3 to <8 x i32>
105 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
106 %6 = bitcast <8 x i32> %5 to <32 x i8>
107 ret <32 x i8> %6
108}
109
110define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
111; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128:
112; CHECK: # BB#0:
113; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
114; CHECK-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
115; CHECK-NEXT: retq
116 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
117 %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>
118 ret <16 x i8> %2
119}
120
121define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
122; CHECK-LABEL: combine_permd_as_vpbroadcastd256:
123; CHECK: # BB#0:
124; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
125; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
126; CHECK-NEXT: retq
127 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
128 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer)
129 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
130 ret <8 x i32> %3
131}
132
133define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
134; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128:
135; CHECK: # BB#0:
136; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
137; CHECK-NEXT: retq
138 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
139 ret <16 x i8> %1
140}
141
142define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
143; CHECK-LABEL: combine_permd_as_vpbroadcastq256:
144; CHECK: # BB#0:
145; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
146; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
147; CHECK-NEXT: retq
148 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
149 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
150 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
151 ret <8 x i32> %3
152}
153
154define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
155; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128:
156; CHECK: # BB#0:
157; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
158; CHECK-NEXT: retq
159 %1 = bitcast <4 x float> %a to <16 x i8>
160 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
161 %3 = bitcast <16 x i8> %2 to <4 x float>
162 ret <4 x float> %3
163}
164
165define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
166; CHECK-LABEL: combine_permd_as_vpbroadcastss256:
167; CHECK: # BB#0:
168; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
169; CHECK-NEXT: retq
170 %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
171 %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
172 ret <8 x float> %2
173}
174
175define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) {
176; CHECK-LABEL: combine_permd_as_vpbroadcastsd256:
177; CHECK: # BB#0:
178; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
179; CHECK-NEXT: retq
180 %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
181 %2 = bitcast <4 x double> %1 to <8 x float>
182 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
183 %4 = bitcast <8 x float> %3 to <4 x double>
184 ret <4 x double> %4
185}