blob: 628e7cabc1e1d800d9095df54c38d53109c97fd0 [file] [log] [blame]
Simon Pilgrim2f529412017-08-10 18:02:45 +00001; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3
3; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
Simon Pilgrimc63f93a2017-08-16 13:50:20 +00004; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+xop | FileCheck %s -check-prefix=CHECK -check-prefix=XOP
Simon Pilgrim2f529412017-08-10 18:02:45 +00005; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
6; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
7; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
8; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
9; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VBMI
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000010
11;
12; Verify the cost model for 2 src shuffles
13;
14
Simon Pilgrim2f529412017-08-10 18:02:45 +000015; CHECK-LABEL: 'test_vXf64'
16define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) {
17
18 ; SSE2: cost of 1 {{.*}} %V128 = shufflevector
19 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
20 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +000021 ; XOP: cost of 1 {{.*}} %V128 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +000022 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
23 ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
24 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
25 %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
26
27 ; SSE2: cost of 6 {{.*}} %V256 = shufflevector
28 ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
29 ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +000030 ; XOP: cost of 4 {{.*}} %V256 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +000031 ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +000032 ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +000033 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000034 %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
35
Simon Pilgrim2f529412017-08-10 18:02:45 +000036 ; SSE2: cost of 28 {{.*}} %V512 = shufflevector
37 ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
38 ; SSE42: cost of 28 {{.*}} %V512 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +000039 ; XOP: cost of 24 {{.*}} %V512 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +000040 ; AVX1: cost of 24 {{.*}} %V512 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +000041 ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +000042 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000043 %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
44
Simon Pilgrim2f529412017-08-10 18:02:45 +000045 ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
46 ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
47 ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +000048 ; XOP: cost of 112 {{.*}} %V1024 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +000049 ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +000050 ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +000051 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000052 %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
53
54 ret void
55}
56
Simon Pilgrim2f529412017-08-10 18:02:45 +000057; CHECK-LABEL: 'test_vXi64'
58define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) {
59
60 ; SSE2: cost of 1 {{.*}} %V128 = shufflevector
61 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
62 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +000063 ; XOP: cost of 1 {{.*}} %V128 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +000064 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
65 ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
66 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
67 %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
68
69 ; SSE2: cost of 6 {{.*}} %V256 = shufflevector
70 ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
71 ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +000072 ; XOP: cost of 4 {{.*}} %V256 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +000073 ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +000074 ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +000075 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
76 %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
77
78 ; SSE2: cost of 28 {{.*}} %V512 = shufflevector
79 ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
80 ; SSE42: cost of 28 {{.*}} %V512 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +000081 ; XOP: cost of 24 {{.*}} %V512 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +000082 ; AVX1: cost of 24 {{.*}} %V512 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +000083 ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +000084 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
85 %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
86
87 ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
88 ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
89 ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +000090 ; XOP: cost of 112 {{.*}} %V1024 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +000091 ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +000092 ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +000093 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
94 %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
95
96 ret void
97}
98
Elena Demikhovsky21706cb2017-01-02 10:37:52 +000099; CHECK-LABEL: 'test_vXf32'
100define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) {
101
Simon Pilgrim2f529412017-08-10 18:02:45 +0000102 ; SSE2: cost of 2 {{.*}} %V128 = shufflevector
103 ; SSSE3: cost of 2 {{.*}} %V128 = shufflevector
104 ; SSE42: cost of 2 {{.*}} %V128 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000105 ; XOP: cost of 2 {{.*}} %V128 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000106 ; AVX1: cost of 2 {{.*}} %V128 = shufflevector
107 ; AVX2: cost of 2 {{.*}} %V128 = shufflevector
108 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000109 %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
110
Simon Pilgrim2f529412017-08-10 18:02:45 +0000111 ; SSE2: cost of 12 {{.*}} %V256 = shufflevector
112 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
113 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000114 ; XOP: cost of 4 {{.*}} %V256 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000115 ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000116 ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000117 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000118 %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
119
Simon Pilgrim2f529412017-08-10 18:02:45 +0000120 ; SSE2: cost of 56 {{.*}} %V512 = shufflevector
121 ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
122 ; SSE42: cost of 56 {{.*}} %V512 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000123 ; XOP: cost of 24 {{.*}} %V512 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000124 ; AVX1: cost of 24 {{.*}} %V512 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000125 ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000126 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000127 %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
128
Simon Pilgrim2f529412017-08-10 18:02:45 +0000129 ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
130 ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
131 ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000132 ; XOP: cost of 112 {{.*}} %V1024 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000133 ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000134 ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000135 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000136 %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
137
138 ret void
139}
140
Simon Pilgrim2f529412017-08-10 18:02:45 +0000141; CHECK-LABEL: 'test_vXi32'
142define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) {
143
144 ; SSE2: cost of 2 {{.*}} %V128 = shufflevector
145 ; SSSE3: cost of 2 {{.*}} %V128 = shufflevector
146 ; SSE42: cost of 2 {{.*}} %V128 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000147 ; XOP: cost of 2 {{.*}} %V128 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000148 ; AVX1: cost of 2 {{.*}} %V128 = shufflevector
149 ; AVX2: cost of 2 {{.*}} %V128 = shufflevector
150 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
151 %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
152
153 ; SSE2: cost of 12 {{.*}} %V256 = shufflevector
154 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
155 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000156 ; XOP: cost of 4 {{.*}} %V256 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000157 ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000158 ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000159 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
160 %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
161
162 ; SSE2: cost of 56 {{.*}} %V512 = shufflevector
163 ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
164 ; SSE42: cost of 56 {{.*}} %V512 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000165 ; XOP: cost of 24 {{.*}} %V512 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000166 ; AVX1: cost of 24 {{.*}} %V512 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000167 ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000168 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
169 %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
170
171 ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
172 ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
173 ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000174 ; XOP: cost of 112 {{.*}} %V1024 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000175 ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000176 ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000177 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
178 %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
179
180 ret void
181}
182
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000183; CHECK-LABEL: 'test_vXi16'
184define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) {
185
Simon Pilgrimb59c2d92017-08-10 19:32:35 +0000186 ; SSE2: cost of 8 {{.*}} %V128 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000187 ; SSSE3: cost of 3 {{.*}} %V128 = shufflevector
188 ; SSE42: cost of 3 {{.*}} %V128 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000189 ; XOP: cost of 1 {{.*}} %V128 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000190 ; AVX1: cost of 3 {{.*}} %V128 = shufflevector
191 ; AVX2: cost of 3 {{.*}} %V128 = shufflevector
192 ; AVX512F: cost of 3 {{.*}} %V128 = shufflevector
193 ; AVX512BW: cost of 1 {{.*}} %V128 = shufflevector
194 ; AVX512VBMI: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000195 %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
196
Simon Pilgrimb59c2d92017-08-10 19:32:35 +0000197 ; SSE2: cost of 48 {{.*}} %V256 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000198 ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
199 ; SSE42: cost of 18 {{.*}} %V256 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000200 ; XOP: cost of 9 {{.*}} %V256 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000201 ; AVX1: cost of 15 {{.*}} %V256 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000202 ; AVX2: cost of 7 {{.*}} %V256 = shufflevector
203 ; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000204 ; AVX512BW: cost of 1 {{.*}} %V256 = shufflevector
205 ; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000206 %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
207
Simon Pilgrimb59c2d92017-08-10 19:32:35 +0000208 ; SSE2: cost of 224 {{.*}} %V512 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000209 ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
210 ; SSE42: cost of 84 {{.*}} %V512 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000211 ; XOP: cost of 54 {{.*}} %V512 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000212 ; AVX1: cost of 90 {{.*}} %V512 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000213 ; AVX2: cost of 42 {{.*}} %V512 = shufflevector
214 ; AVX512F: cost of 42 {{.*}} %V512 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000215 ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
216 ; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000217 %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
218
Simon Pilgrimb59c2d92017-08-10 19:32:35 +0000219 ; SSE2: cost of 960 {{.*}} %V1024 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000220 ; SSSE3: cost of 360 {{.*}} %V1024 = shufflevector
221 ; SSE42: cost of 360 {{.*}} %V1024 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000222 ; XOP: cost of 252 {{.*}} %V1024 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000223 ; AVX1: cost of 420 {{.*}} %V1024 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000224 ; AVX2: cost of 196 {{.*}} %V1024 = shufflevector
225 ; AVX512F: cost of 196 {{.*}} %V1024 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000226 ; AVX512BW: cost of 6 {{.*}} %V1024 = shufflevector
227 ; AVX512VBMI: cost of 6 {{.*}} %V1024 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000228 %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
Simon Pilgrim2f529412017-08-10 18:02:45 +0000229
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000230 ret void
231}
232
233; CHECK-LABEL: 'test_vXi8'
234define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) {
Simon Pilgrim2f529412017-08-10 18:02:45 +0000235
Simon Pilgrimb59c2d92017-08-10 19:32:35 +0000236 ; SSE2: cost of 13 {{.*}} %V128 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000237 ; SSSE3: cost of 3 {{.*}} %V128 = shufflevector
238 ; SSE42: cost of 3 {{.*}} %V128 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000239 ; XOP: cost of 1 {{.*}} %V128 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000240 ; AVX1: cost of 3 {{.*}} %V128 = shufflevector
241 ; AVX2: cost of 3 {{.*}} %V128 = shufflevector
242 ; AVX512F: cost of 3 {{.*}} %V128 = shufflevector
243 ; AVX512BW: cost of 3 {{.*}} %V128 = shufflevector
244 ; AVX512VBMI: cost of 1 {{.*}} %V128 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000245 %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
246
Simon Pilgrimb59c2d92017-08-10 19:32:35 +0000247 ; SSE2: cost of 78 {{.*}} %V256 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000248 ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
249 ; SSE42: cost of 18 {{.*}} %V256 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000250 ; XOP: cost of 9 {{.*}} %V256 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000251 ; AVX1: cost of 15 {{.*}} %V256 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000252 ; AVX2: cost of 7 {{.*}} %V256 = shufflevector
253 ; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000254 ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector
255 ; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000256 %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
257
Simon Pilgrimb59c2d92017-08-10 19:32:35 +0000258 ; SSE2: cost of 364 {{.*}} %V512 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000259 ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
260 ; SSE42: cost of 84 {{.*}} %V512 = shufflevector
Simon Pilgrimc63f93a2017-08-16 13:50:20 +0000261 ; XOP: cost of 54 {{.*}} %V512 = shufflevector
Simon Pilgrim73545312017-08-10 19:02:51 +0000262 ; AVX1: cost of 90 {{.*}} %V512 = shufflevector
Simon Pilgrimac2e50a2017-08-10 18:29:34 +0000263 ; AVX2: cost of 42 {{.*}} %V512 = shufflevector
264 ; AVX512F: cost of 42 {{.*}} %V512 = shufflevector
Simon Pilgrim2f529412017-08-10 18:02:45 +0000265 ; AVX512BW: cost of 19 {{.*}} %V512 = shufflevector
266 ; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
Elena Demikhovsky21706cb2017-01-02 10:37:52 +0000267 %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
268
269 ret void
270}