blob: ba1a59da2c2cbdfad7949843a25ee50d06701e72 [file] [log] [blame]
Michael Kupersteincb4ceed2017-02-01 18:09:47 +00001; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3
3; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
4; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
5; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
6; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
7; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
Elena Demikhovsky21706cb2017-01-02 10:37:52 +00008
9;
10; Verify the cost model for 1 src shuffles
11;
12
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000013; AVX512-LABEL: 'test_vXf64'
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000014define void @test_vXf64(<4 x double> %src256, <8 x double> %src512, <16 x double> %src1024) {
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000015 ; SSE2: cost of 4 {{.*}} %V256 = shufflevector
16 ; SSSE3: cost of 4 {{.*}} %V256 = shufflevector
17 ; SSE42: cost of 4 {{.*}} %V256 = shufflevector
18 ; AVX1: cost of 6 {{.*}} %V256 = shufflevector
19 ; AVX2: cost of 6 {{.*}} %V256 = shufflevector
20 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000021 %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
22
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000023 ; SSE2: cost of 24 {{.*}} %V512 = shufflevector
24 ; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
25 ; SSE42: cost of 24 {{.*}} %V512 = shufflevector
26 ; AVX1: cost of 12 {{.*}} %V512 = shufflevector
27 ; AVX2: cost of 12 {{.*}} %V512 = shufflevector
28 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000029 %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
30
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000031 ; SSE2: cost of 112 {{.*}} %V1024 = shufflevector
32 ; SSSE3: cost of 112 {{.*}} %V1024 = shufflevector
33 ; SSE42: cost of 112 {{.*}} %V1024 = shufflevector
34 ; AVX1: cost of 72 {{.*}} %V1024 = shufflevector
35 ; AVX2: cost of 72 {{.*}} %V1024 = shufflevector
36 ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000037 %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
38
39 ret void
40}
41
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000042; AVX512-LABEL: 'test_vXi64'
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000043define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) {
44
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000045 ; SSE2: cost of 8 {{.*}} %V256 = shufflevector
46 ; SSSE3: cost of 8 {{.*}} %V256 = shufflevector
47 ; SSE42: cost of 8 {{.*}} %V256 = shufflevector
48 ; AVX1: cost of 8 {{.*}} %V256 = shufflevector
49 ; AVX2: cost of 8 {{.*}} %V256 = shufflevector
50 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000051 %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
52
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000053 ; SSE2: cost of 48 {{.*}} %V512 = shufflevector
54 ; SSSE3: cost of 48 {{.*}} %V512 = shufflevector
55 ; SSE42: cost of 48 {{.*}} %V512 = shufflevector
56 ; AVX1: cost of 16 {{.*}} %V512 = shufflevector
57 ; AVX2: cost of 16 {{.*}} %V512 = shufflevector
58 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000059 %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
60
61 ret void
62}
63
64; CHECK-LABEL: 'test_vXf32'
65define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %src512) {
66
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000067 ; SSE2: cost of 6 {{.*}} %V128 = shufflevector
68 ; SSSE3: cost of 6 {{.*}} %V128 = shufflevector
69 ; SSE42: cost of 6 {{.*}} %V128 = shufflevector
70 ; AVX1: cost of 6 {{.*}} %V128 = shufflevector
71 ; AVX2: cost of 6 {{.*}} %V128 = shufflevector
72 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000073 %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
74
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000075 ; SSE2: cost of 12 {{.*}} %V256 = shufflevector
76 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
77 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
78 ; AVX1: cost of 14 {{.*}} %V256 = shufflevector
79 ; AVX2: cost of 14 {{.*}} %V256 = shufflevector
80 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000081 %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
82
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000083 ; SSE2: cost of 72 {{.*}} %V512 = shufflevector
84 ; SSSE3: cost of 72 {{.*}} %V512 = shufflevector
85 ; SSE42: cost of 72 {{.*}} %V512 = shufflevector
86 ; AVX1: cost of 28 {{.*}} %V512 = shufflevector
87 ; AVX2: cost of 28 {{.*}} %V512 = shufflevector
88 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000089 %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
90
91 ret void
92}
93
94; CHECK-LABEL: 'test_vXi32'
95define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024) {
96
Michael Kupersteincb4ceed2017-02-01 18:09:47 +000097 ; SSE2: cost of 8 {{.*}} %V128 = shufflevector
98 ; SSSE3: cost of 8 {{.*}} %V128 = shufflevector
99 ; SSE42: cost of 8 {{.*}} %V128 = shufflevector
100 ; AVX1: cost of 8 {{.*}} %V128 = shufflevector
101 ; AVX2: cost of 8 {{.*}} %V128 = shufflevector
102 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000103 %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
104
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000105 ; SSE2: cost of 16 {{.*}} %V256 = shufflevector
106 ; SSSE3: cost of 16 {{.*}} %V256 = shufflevector
107 ; SSE42: cost of 16 {{.*}} %V256 = shufflevector
108 ; AVX1: cost of 16 {{.*}} %V256 = shufflevector
109 ; AVX2: cost of 16 {{.*}} %V256 = shufflevector
110 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000111 %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
112
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000113 ; SSE2: cost of 96 {{.*}} %V512 = shufflevector
114 ; SSSE3: cost of 96 {{.*}} %V512 = shufflevector
115 ; SSE42: cost of 96 {{.*}} %V512 = shufflevector
116 ; AVX1: cost of 32 {{.*}} %V512 = shufflevector
117 ; AVX2: cost of 32 {{.*}} %V512 = shufflevector
118 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000119 %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
120
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000121 ; SSE2: cost of 448 {{.*}} %V1024 = shufflevector
122 ; SSSE3: cost of 448 {{.*}} %V1024 = shufflevector
123 ; SSE42: cost of 448 {{.*}} %V1024 = shufflevector
124 ; AVX1: cost of 192 {{.*}} %V1024 = shufflevector
125 ; AVX2: cost of 192 {{.*}} %V1024 = shufflevector
126 ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000127 %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
128 ret void
129}
130
131; CHECK-LABEL: 'test_vXi16'
132define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024) {
133
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000134 ; SSE2: cost of 16 {{.*}} %V128 = shufflevector
135 ; SSSE3: cost of 16 {{.*}} %V128 = shufflevector
136 ; SSE42: cost of 16 {{.*}} %V128 = shufflevector
137 ; AVX1: cost of 16 {{.*}} %V128 = shufflevector
138 ; AVX2: cost of 16 {{.*}} %V128 = shufflevector
139 ; AVX512F: cost of 16 {{.*}} %V128 = shufflevector
140 ; AVX512BW: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000141 %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
142
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000143 ; SSE2: cost of 32 {{.*}} %V256 = shufflevector
144 ; SSSE3: cost of 32 {{.*}} %V256 = shufflevector
145 ; SSE42: cost of 32 {{.*}} %V256 = shufflevector
146 ; AVX1: cost of 32 {{.*}} %V256 = shufflevector
147 ; AVX2: cost of 32 {{.*}} %V256 = shufflevector
148 ; AVX512F: cost of 32 {{.*}} %V256 = shufflevector
149 ; AVX512BW cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000150 %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
151
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000152 ; SSE2: cost of 192 {{.*}} %V512 = shufflevector
153 ; SSSE3: cost of 192 {{.*}} %V512 = shufflevector
154 ; SSE42: cost of 192 {{.*}} %V512 = shufflevector
155 ; AVX1: cost of 64 {{.*}} %V512 = shufflevector
156 ; AVX2: cost of 64 {{.*}} %V512 = shufflevector
157 ; AVX512F: cost of 64 {{.*}} %V512 = shufflevector
158 ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000159 %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
160
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000161 ; SSE2: cost of 896 {{.*}} %V1024 = shufflevector
162 ; SSSE3: cost of 896 {{.*}} %V1024 = shufflevector
163 ; SSE42: cost of 896 {{.*}} %V1024 = shufflevector
164 ; AVX1: cost of 384 {{.*}} %V1024 = shufflevector
165 ; AVX2: cost of 384 {{.*}} %V1024 = shufflevector
166 ; AVX512F: cost of 384 {{.*}} %V1024 = shufflevector
167 ; AVX512BW: cost of 2 {{.*}} %V1024 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000168 %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
169 ret void
170}
171
172; CHECK-LABEL: 'test_vXi8'
173define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) {
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000174 ; SSE2: cost of 32 {{.*}} %V128 = shufflevector
175 ; SSSE3: cost of 32 {{.*}} %V128 = shufflevector
176 ; SSE42: cost of 32 {{.*}} %V128 = shufflevector
177 ; AVX1: cost of 32 {{.*}} %V128 = shufflevector
178 ; AVX2: cost of 32 {{.*}} %V128 = shufflevector
179 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000180 %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
181
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000182 ; SSE2: cost of 64 {{.*}} %V256 = shufflevector
183 ; SSSE3: cost of 64 {{.*}} %V256 = shufflevector
184 ; SSE42: cost of 64 {{.*}} %V256 = shufflevector
185 ; AVX1: cost of 64 {{.*}} %V256 = shufflevector
186 ; AVX2: cost of 64 {{.*}} %V256 = shufflevector
187 ; AVX512F: cost of 64 {{.*}} %V256 = shufflevector
188 ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000189 %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
190
Michael Kupersteincb4ceed2017-02-01 18:09:47 +0000191 ; SSE2: cost of 384 {{.*}} %V512 = shufflevector
192 ; SSSE3: cost of 384 {{.*}} %V512 = shufflevector
193 ; SSE42: cost of 384 {{.*}} %V512 = shufflevector
194 ; AVX1: cost of 128 {{.*}} %V512 = shufflevector
195 ; AVX2: cost of 128 {{.*}} %V512 = shufflevector
196 ; AVX512F: cost of 128 {{.*}} %V512 = shufflevector
197 ; AVX512BW: cost of 8 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000198 %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
199
200 ret void
201}