blob: a4a0673bc3c08be44b3c941645c0bf308af8d147 [file] [log] [blame]
Michael Kupersteincb4ceed2017-02-01 18:09:47 +00001; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3
3; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
4; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
5; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
6; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
7; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
Elena Demikhovsky21706cb2017-01-02 10:37:52 +00008
9;
10; Verify the cost model for 1 src shuffles
11;
12
Simon Pilgrim419215a2017-08-10 15:25:08 +000013; CHECK-LABEL: 'test_vXf64'
14define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024) {
15
16 ; SSE2: cost of 2 {{.*}} %V128 = shufflevector
17 ; SSSE3: cost of 2 {{.*}} %V128 = shufflevector
18 ; SSE42: cost of 2 {{.*}} %V128 = shufflevector
19 ; AVX1: cost of 2 {{.*}} %V128 = shufflevector
20 ; AVX2: cost of 2 {{.*}} %V128 = shufflevector
21 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
22 %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
23
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000024 ; SSE2: cost of 4 {{.*}} %V256 = shufflevector
25 ; SSSE3: cost of 4 {{.*}} %V256 = shufflevector
26 ; SSE42: cost of 4 {{.*}} %V256 = shufflevector
27 ; AVX1: cost of 6 {{.*}} %V256 = shufflevector
28 ; AVX2: cost of 6 {{.*}} %V256 = shufflevector
29 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000030 %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
31
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000032 ; SSE2: cost of 24 {{.*}} %V512 = shufflevector
33 ; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
34 ; SSE42: cost of 24 {{.*}} %V512 = shufflevector
35 ; AVX1: cost of 12 {{.*}} %V512 = shufflevector
36 ; AVX2: cost of 12 {{.*}} %V512 = shufflevector
37 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000038 %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
39
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000040 ; SSE2: cost of 112 {{.*}} %V1024 = shufflevector
41 ; SSSE3: cost of 112 {{.*}} %V1024 = shufflevector
42 ; SSE42: cost of 112 {{.*}} %V1024 = shufflevector
43 ; AVX1: cost of 72 {{.*}} %V1024 = shufflevector
44 ; AVX2: cost of 72 {{.*}} %V1024 = shufflevector
45 ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000046 %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
47
48 ret void
49}
50
Simon Pilgrim419215a2017-08-10 15:25:08 +000051; CHECK-LABEL: 'test_vXi64'
52define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) {
53
54 ; SSE2: cost of 1 {{.*}} %V128 = shufflevector
55 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
56 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
57 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
58 ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
59 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
60 %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000061
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000062 ; SSE2: cost of 8 {{.*}} %V256 = shufflevector
63 ; SSSE3: cost of 8 {{.*}} %V256 = shufflevector
64 ; SSE42: cost of 8 {{.*}} %V256 = shufflevector
65 ; AVX1: cost of 8 {{.*}} %V256 = shufflevector
Michael Kupersteine6d59fd2017-02-02 20:27:13 +000066 ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000067 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000068 %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
69
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000070 ; SSE2: cost of 48 {{.*}} %V512 = shufflevector
71 ; SSSE3: cost of 48 {{.*}} %V512 = shufflevector
72 ; SSE42: cost of 48 {{.*}} %V512 = shufflevector
73 ; AVX1: cost of 16 {{.*}} %V512 = shufflevector
74 ; AVX2: cost of 16 {{.*}} %V512 = shufflevector
75 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000076 %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
77
78 ret void
79}
80
81; CHECK-LABEL: 'test_vXf32'
82define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %src512) {
83
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000084 ; SSE2: cost of 6 {{.*}} %V128 = shufflevector
85 ; SSSE3: cost of 6 {{.*}} %V128 = shufflevector
86 ; SSE42: cost of 6 {{.*}} %V128 = shufflevector
87 ; AVX1: cost of 6 {{.*}} %V128 = shufflevector
88 ; AVX2: cost of 6 {{.*}} %V128 = shufflevector
89 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000090 %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
91
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000092 ; SSE2: cost of 12 {{.*}} %V256 = shufflevector
93 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
94 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
95 ; AVX1: cost of 14 {{.*}} %V256 = shufflevector
96 ; AVX2: cost of 14 {{.*}} %V256 = shufflevector
97 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000098 %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
99
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000100 ; SSE2: cost of 72 {{.*}} %V512 = shufflevector
101 ; SSSE3: cost of 72 {{.*}} %V512 = shufflevector
102 ; SSE42: cost of 72 {{.*}} %V512 = shufflevector
103 ; AVX1: cost of 28 {{.*}} %V512 = shufflevector
104 ; AVX2: cost of 28 {{.*}} %V512 = shufflevector
105 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000106 %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
107
108 ret void
109}
110
111; CHECK-LABEL: 'test_vXi32'
112define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024) {
113
Michael Kupersteine6d59fd2017-02-02 20:27:13 +0000114 ; SSE2: cost of 1 {{.*}} %V128 = shufflevector
115 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
116 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
117 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
118 ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000119 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000120 %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
121
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000122 ; SSE2: cost of 16 {{.*}} %V256 = shufflevector
123 ; SSSE3: cost of 16 {{.*}} %V256 = shufflevector
124 ; SSE42: cost of 16 {{.*}} %V256 = shufflevector
125 ; AVX1: cost of 16 {{.*}} %V256 = shufflevector
Michael Kupersteine6d59fd2017-02-02 20:27:13 +0000126 ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000127 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000128 %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
129
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000130 ; SSE2: cost of 96 {{.*}} %V512 = shufflevector
131 ; SSSE3: cost of 96 {{.*}} %V512 = shufflevector
132 ; SSE42: cost of 96 {{.*}} %V512 = shufflevector
133 ; AVX1: cost of 32 {{.*}} %V512 = shufflevector
134 ; AVX2: cost of 32 {{.*}} %V512 = shufflevector
135 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000136 %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
137
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000138 ; SSE2: cost of 448 {{.*}} %V1024 = shufflevector
139 ; SSSE3: cost of 448 {{.*}} %V1024 = shufflevector
140 ; SSE42: cost of 448 {{.*}} %V1024 = shufflevector
141 ; AVX1: cost of 192 {{.*}} %V1024 = shufflevector
142 ; AVX2: cost of 192 {{.*}} %V1024 = shufflevector
143 ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000144 %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
145 ret void
146}
147
148; CHECK-LABEL: 'test_vXi16'
149define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024) {
150
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000151 ; SSE2: cost of 16 {{.*}} %V128 = shufflevector
Michael Kupersteine6d59fd2017-02-02 20:27:13 +0000152 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
153 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
154 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
155 ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
156 ; AVX512F: cost of 1 {{.*}} %V128 = shufflevector
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000157 ; AVX512BW: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000158 %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
159
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000160 ; SSE2: cost of 32 {{.*}} %V256 = shufflevector
161 ; SSSE3: cost of 32 {{.*}} %V256 = shufflevector
162 ; SSE42: cost of 32 {{.*}} %V256 = shufflevector
163 ; AVX1: cost of 32 {{.*}} %V256 = shufflevector
Michael Kupersteine6d59fd2017-02-02 20:27:13 +0000164 ; AVX2: cost of 4 {{.*}} %V256 = shufflevector
165 ; AVX512F: cost of 4 {{.*}} %V256 = shufflevector
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000166 ; AVX512BW cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000167 %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
168
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000169 ; SSE2: cost of 192 {{.*}} %V512 = shufflevector
170 ; SSSE3: cost of 192 {{.*}} %V512 = shufflevector
171 ; SSE42: cost of 192 {{.*}} %V512 = shufflevector
172 ; AVX1: cost of 64 {{.*}} %V512 = shufflevector
173 ; AVX2: cost of 64 {{.*}} %V512 = shufflevector
174 ; AVX512F: cost of 64 {{.*}} %V512 = shufflevector
175 ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000176 %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
177
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000178 ; SSE2: cost of 896 {{.*}} %V1024 = shufflevector
179 ; SSSE3: cost of 896 {{.*}} %V1024 = shufflevector
180 ; SSE42: cost of 896 {{.*}} %V1024 = shufflevector
181 ; AVX1: cost of 384 {{.*}} %V1024 = shufflevector
182 ; AVX2: cost of 384 {{.*}} %V1024 = shufflevector
183 ; AVX512F: cost of 384 {{.*}} %V1024 = shufflevector
184 ; AVX512BW: cost of 2 {{.*}} %V1024 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000185 %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
186 ret void
187}
188
189; CHECK-LABEL: 'test_vXi8'
190define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) {
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000191 ; SSE2: cost of 32 {{.*}} %V128 = shufflevector
Michael Kupersteine6d59fd2017-02-02 20:27:13 +0000192 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
193 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
194 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
195 ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000196 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000197 %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
198
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000199 ; SSE2: cost of 64 {{.*}} %V256 = shufflevector
200 ; SSSE3: cost of 64 {{.*}} %V256 = shufflevector
201 ; SSE42: cost of 64 {{.*}} %V256 = shufflevector
202 ; AVX1: cost of 64 {{.*}} %V256 = shufflevector
Michael Kupersteine6d59fd2017-02-02 20:27:13 +0000203 ; AVX2: cost of 4 {{.*}} %V256 = shufflevector
204 ; AVX512F: cost of 4 {{.*}} %V256 = shufflevector
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000205 ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000206 %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
207
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000208 ; SSE2: cost of 384 {{.*}} %V512 = shufflevector
209 ; SSSE3: cost of 384 {{.*}} %V512 = shufflevector
210 ; SSE42: cost of 384 {{.*}} %V512 = shufflevector
211 ; AVX1: cost of 128 {{.*}} %V512 = shufflevector
212 ; AVX2: cost of 128 {{.*}} %V512 = shufflevector
213 ; AVX512F: cost of 128 {{.*}} %V512 = shufflevector
214 ; AVX512BW: cost of 8 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000215 %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
216
217 ret void
218}