blob: 371746a5e163e0a949cc01d119c61a5aee3cac18 [file] [log] [blame]
Simon Pilgrim3cd61a02016-08-21 18:34:47 +00001; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
3; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
5; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
7
8target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
9target triple = "x86_64-apple-macosx10.8.0"
10
11; CHECK-LABEL: 'fadd'
12define i32 @fadd(i32 %arg) {
13 ; SSSE3: cost of 2 {{.*}} %A = fadd
14 ; SSE42: cost of 2 {{.*}} %A = fadd
15 ; AVX: cost of 2 {{.*}} %A = fadd
16 ; AVX2: cost of 2 {{.*}} %A = fadd
17 ; AVX512: cost of 2 {{.*}} %A = fadd
18 %A = fadd <4 x float> undef, undef
19 ; SSSE3: cost of 4 {{.*}} %B = fadd
20 ; SSE42: cost of 4 {{.*}} %B = fadd
21 ; AVX: cost of 2 {{.*}} %B = fadd
22 ; AVX2: cost of 2 {{.*}} %B = fadd
23 ; AVX512: cost of 2 {{.*}} %B = fadd
24 %B = fadd <8 x float> undef, undef
25 ; SSSE3: cost of 8 {{.*}} %C = fadd
26 ; SSE42: cost of 8 {{.*}} %C = fadd
27 ; AVX: cost of 4 {{.*}} %C = fadd
28 ; AVX2: cost of 4 {{.*}} %C = fadd
29 ; AVX512: cost of 2 {{.*}} %C = fadd
30 %C = fadd <16 x float> undef, undef
31
32 ; SSSE3: cost of 2 {{.*}} %D = fadd
33 ; SSE42: cost of 2 {{.*}} %D = fadd
34 ; AVX: cost of 2 {{.*}} %D = fadd
35 ; AVX2: cost of 2 {{.*}} %D = fadd
36 ; AVX512: cost of 2 {{.*}} %D = fadd
37 %D = fadd <2 x double> undef, undef
38 ; SSSE3: cost of 4 {{.*}} %E = fadd
39 ; SSE42: cost of 4 {{.*}} %E = fadd
40 ; AVX: cost of 2 {{.*}} %E = fadd
41 ; AVX2: cost of 2 {{.*}} %E = fadd
42 ; AVX512: cost of 2 {{.*}} %E = fadd
43 %E = fadd <4 x double> undef, undef
44 ; SSSE3: cost of 8 {{.*}} %F = fadd
45 ; SSE42: cost of 8 {{.*}} %F = fadd
46 ; AVX: cost of 4 {{.*}} %F = fadd
47 ; AVX2: cost of 4 {{.*}} %F = fadd
48 ; AVX512: cost of 2 {{.*}} %F = fadd
49 %F = fadd <8 x double> undef, undef
50
51 ret i32 undef
52}
53
54; CHECK-LABEL: 'fsub'
55define i32 @fsub(i32 %arg) {
56 ; SSSE3: cost of 2 {{.*}} %A = fsub
57 ; SSE42: cost of 2 {{.*}} %A = fsub
58 ; AVX: cost of 2 {{.*}} %A = fsub
59 ; AVX2: cost of 2 {{.*}} %A = fsub
60 ; AVX512: cost of 2 {{.*}} %A = fsub
61 %A = fsub <4 x float> undef, undef
62 ; SSSE3: cost of 4 {{.*}} %B = fsub
63 ; SSE42: cost of 4 {{.*}} %B = fsub
64 ; AVX: cost of 2 {{.*}} %B = fsub
65 ; AVX2: cost of 2 {{.*}} %B = fsub
66 ; AVX512: cost of 2 {{.*}} %B = fsub
67 %B = fsub <8 x float> undef, undef
68 ; SSSE3: cost of 8 {{.*}} %C = fsub
69 ; SSE42: cost of 8 {{.*}} %C = fsub
70 ; AVX: cost of 4 {{.*}} %C = fsub
71 ; AVX2: cost of 4 {{.*}} %C = fsub
72 ; AVX512: cost of 2 {{.*}} %C = fsub
73 %C = fsub <16 x float> undef, undef
74
75 ; SSSE3: cost of 2 {{.*}} %D = fsub
76 ; SSE42: cost of 2 {{.*}} %D = fsub
77 ; AVX: cost of 2 {{.*}} %D = fsub
78 ; AVX2: cost of 2 {{.*}} %D = fsub
79 ; AVX512: cost of 2 {{.*}} %D = fsub
80 %D = fsub <2 x double> undef, undef
81 ; SSSE3: cost of 4 {{.*}} %E = fsub
82 ; SSE42: cost of 4 {{.*}} %E = fsub
83 ; AVX: cost of 2 {{.*}} %E = fsub
84 ; AVX2: cost of 2 {{.*}} %E = fsub
85 ; AVX512: cost of 2 {{.*}} %E = fsub
86 %E = fsub <4 x double> undef, undef
87 ; SSSE3: cost of 8 {{.*}} %F = fsub
88 ; SSE42: cost of 8 {{.*}} %F = fsub
89 ; AVX: cost of 4 {{.*}} %F = fsub
90 ; AVX2: cost of 4 {{.*}} %F = fsub
91 ; AVX512: cost of 2 {{.*}} %F = fsub
92 %F = fsub <8 x double> undef, undef
93
94 ret i32 undef
95}
96
97; CHECK-LABEL: 'fmul'
98define i32 @fmul(i32 %arg) {
99 ; SSSE3: cost of 2 {{.*}} %A = fmul
100 ; SSE42: cost of 2 {{.*}} %A = fmul
101 ; AVX: cost of 2 {{.*}} %A = fmul
102 ; AVX2: cost of 2 {{.*}} %A = fmul
103 ; AVX512: cost of 2 {{.*}} %A = fmul
104 %A = fmul <4 x float> undef, undef
105 ; SSSE3: cost of 4 {{.*}} %B = fmul
106 ; SSE42: cost of 4 {{.*}} %B = fmul
107 ; AVX: cost of 2 {{.*}} %B = fmul
108 ; AVX2: cost of 2 {{.*}} %B = fmul
109 ; AVX512: cost of 2 {{.*}} %B = fmul
110 %B = fmul <8 x float> undef, undef
111 ; SSSE3: cost of 8 {{.*}} %C = fmul
112 ; SSE42: cost of 8 {{.*}} %C = fmul
113 ; AVX: cost of 4 {{.*}} %C = fmul
114 ; AVX2: cost of 4 {{.*}} %C = fmul
115 ; AVX512: cost of 2 {{.*}} %C = fmul
116 %C = fmul <16 x float> undef, undef
117
118 ; SSSE3: cost of 2 {{.*}} %D = fmul
119 ; SSE42: cost of 2 {{.*}} %D = fmul
120 ; AVX: cost of 2 {{.*}} %D = fmul
121 ; AVX2: cost of 2 {{.*}} %D = fmul
122 ; AVX512: cost of 2 {{.*}} %D = fmul
123 %D = fmul <2 x double> undef, undef
124 ; SSSE3: cost of 4 {{.*}} %E = fmul
125 ; SSE42: cost of 4 {{.*}} %E = fmul
126 ; AVX: cost of 2 {{.*}} %E = fmul
127 ; AVX2: cost of 2 {{.*}} %E = fmul
128 ; AVX512: cost of 2 {{.*}} %E = fmul
129 %E = fmul <4 x double> undef, undef
130 ; SSSE3: cost of 8 {{.*}} %F = fmul
131 ; SSE42: cost of 8 {{.*}} %F = fmul
132 ; AVX: cost of 4 {{.*}} %F = fmul
133 ; AVX2: cost of 4 {{.*}} %F = fmul
134 ; AVX512: cost of 2 {{.*}} %F = fmul
135 %F = fmul <8 x double> undef, undef
136
137 ret i32 undef
138}
139
140; CHECK-LABEL: 'fdiv'
141define i32 @fdiv(i32 %arg) {
142 ; SSSE3: cost of 2 {{.*}} %A = fdiv
143 ; SSE42: cost of 2 {{.*}} %A = fdiv
144 ; AVX: cost of 2 {{.*}} %A = fdiv
145 ; AVX2: cost of 2 {{.*}} %A = fdiv
146 ; AVX512: cost of 2 {{.*}} %A = fdiv
147 %A = fdiv <4 x float> undef, undef
148 ; SSSE3: cost of 4 {{.*}} %B = fdiv
149 ; SSE42: cost of 4 {{.*}} %B = fdiv
150 ; AVX: cost of 2 {{.*}} %B = fdiv
151 ; AVX2: cost of 2 {{.*}} %B = fdiv
152 ; AVX512: cost of 2 {{.*}} %B = fdiv
153 %B = fdiv <8 x float> undef, undef
154 ; SSSE3: cost of 8 {{.*}} %C = fdiv
155 ; SSE42: cost of 8 {{.*}} %C = fdiv
156 ; AVX: cost of 4 {{.*}} %C = fdiv
157 ; AVX2: cost of 4 {{.*}} %C = fdiv
158 ; AVX512: cost of 2 {{.*}} %C = fdiv
159 %C = fdiv <16 x float> undef, undef
160
161 ; SSSE3: cost of 2 {{.*}} %D = fdiv
162 ; SSE42: cost of 2 {{.*}} %D = fdiv
163 ; AVX: cost of 2 {{.*}} %D = fdiv
164 ; AVX2: cost of 2 {{.*}} %D = fdiv
165 ; AVX512: cost of 2 {{.*}} %D = fdiv
166 %D = fdiv <2 x double> undef, undef
167 ; SSSE3: cost of 4 {{.*}} %E = fdiv
168 ; SSE42: cost of 4 {{.*}} %E = fdiv
169 ; AVX: cost of 2 {{.*}} %E = fdiv
170 ; AVX2: cost of 2 {{.*}} %E = fdiv
171 ; AVX512: cost of 2 {{.*}} %E = fdiv
172 %E = fdiv <4 x double> undef, undef
173 ; SSSE3: cost of 8 {{.*}} %F = fdiv
174 ; SSE42: cost of 8 {{.*}} %F = fdiv
175 ; AVX: cost of 4 {{.*}} %F = fdiv
176 ; AVX2: cost of 4 {{.*}} %F = fdiv
177 ; AVX512: cost of 2 {{.*}} %F = fdiv
178 %F = fdiv <8 x double> undef, undef
179
180 ret i32 undef
181}
182
183; CHECK-LABEL: 'frem'
184define i32 @frem(i32 %arg) {
185 ; SSSE3: cost of 14 {{.*}} %A = frem
186 ; SSE42: cost of 14 {{.*}} %A = frem
187 ; AVX: cost of 14 {{.*}} %A = frem
188 ; AVX2: cost of 14 {{.*}} %A = frem
189 ; AVX512: cost of 14 {{.*}} %A = frem
190 %A = frem <4 x float> undef, undef
191 ; SSSE3: cost of 28 {{.*}} %B = frem
192 ; SSE42: cost of 28 {{.*}} %B = frem
193 ; AVX: cost of 30 {{.*}} %B = frem
194 ; AVX2: cost of 30 {{.*}} %B = frem
195 ; AVX512: cost of 30 {{.*}} %B = frem
196 %B = frem <8 x float> undef, undef
197 ; SSSE3: cost of 56 {{.*}} %C = frem
198 ; SSE42: cost of 56 {{.*}} %C = frem
199 ; AVX: cost of 60 {{.*}} %C = frem
200 ; AVX2: cost of 60 {{.*}} %C = frem
201 ; AVX512: cost of 62 {{.*}} %C = frem
202 %C = frem <16 x float> undef, undef
203
204 ; SSSE3: cost of 6 {{.*}} %D = frem
205 ; SSE42: cost of 6 {{.*}} %D = frem
206 ; AVX: cost of 6 {{.*}} %D = frem
207 ; AVX2: cost of 6 {{.*}} %D = frem
208 ; AVX512: cost of 6 {{.*}} %D = frem
209 %D = frem <2 x double> undef, undef
210 ; SSSE3: cost of 12 {{.*}} %E = frem
211 ; SSE42: cost of 12 {{.*}} %E = frem
212 ; AVX: cost of 14 {{.*}} %E = frem
213 ; AVX2: cost of 14 {{.*}} %E = frem
214 ; AVX512: cost of 14 {{.*}} %E = frem
215 %E = frem <4 x double> undef, undef
216 ; SSSE3: cost of 24 {{.*}} %F = frem
217 ; SSE42: cost of 24 {{.*}} %F = frem
218 ; AVX: cost of 28 {{.*}} %F = frem
219 ; AVX2: cost of 28 {{.*}} %F = frem
220 ; AVX512: cost of 30 {{.*}} %F = frem
221 %F = frem <8 x double> undef, undef
222
223 ret i32 undef
224}