blob: 7c6df410706e7476411c55dd6447ea424f1675ec [file] [log] [blame]
Bob Wilsone8a549c2012-09-29 21:43:49 +00001//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the Swift processor..
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// This section contains legacy support for itineraries. This is
16// required until SD and PostRA schedulers are replaced by MachineScheduler.
17
18def SW_DIS0 : FuncUnit;
19def SW_DIS1 : FuncUnit;
20def SW_DIS2 : FuncUnit;
21
22def SW_ALU0 : FuncUnit;
23def SW_ALU1 : FuncUnit;
24def SW_LS : FuncUnit;
25def SW_IDIV : FuncUnit;
26def SW_FDIV : FuncUnit;
27
28// FIXME: Need bypasses.
29// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
30// IIC_iMOVix2ld better.
31// FIXME: Model the special immediate shifts that are not microcoded.
32// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
33// to issue on pipe 1?
34// FIXME: Model the pipelined behavior of CMP / TST instructions.
35// FIXME: Better model the microcode stages of multiply instructions, especially
36// conditional variants.
37// FIXME: Add preload instruction when it is documented.
38// FIXME: Model non-pipelined nature of FP div / sqrt unit.
39
40def SwiftItineraries : ProcessorItineraries<
41 [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
42 //
43 // Move instructions, unconditional
44 InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
45 InstrStage<1, [SW_ALU0, SW_ALU1]>],
46 [1]>,
47 InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
48 InstrStage<1, [SW_ALU0, SW_ALU1]>],
49 [1]>,
50 InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
51 InstrStage<1, [SW_ALU0, SW_ALU1]>],
52 [1]>,
53 InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
54 InstrStage<1, [SW_ALU0, SW_ALU1]>],
55 [1]>,
56 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
57 InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
58 InstrStage<1, [SW_ALU0, SW_ALU1]>,
59 InstrStage<1, [SW_ALU0, SW_ALU1]>],
60 [2]>,
61 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
62 InstrStage<1, [SW_ALU0, SW_ALU1]>,
63 InstrStage<1, [SW_ALU0, SW_ALU1]>,
64 InstrStage<1, [SW_ALU0, SW_ALU1]>],
65 [3]>,
66 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
67 InstrStage<1, [SW_ALU0, SW_ALU1]>,
68 InstrStage<1, [SW_ALU0, SW_ALU1]>,
69 InstrStage<1, [SW_LS]>],
70 [5]>,
71 //
72 // MVN instructions
73 InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
74 InstrStage<1, [SW_ALU0, SW_ALU1]>],
75 [1]>,
76 InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
77 InstrStage<1, [SW_ALU0, SW_ALU1]>],
78 [1]>,
79 InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
80 InstrStage<1, [SW_ALU0, SW_ALU1]>],
81 [1]>,
82 InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
83 InstrStage<1, [SW_ALU0, SW_ALU1]>],
84 [1]>,
85 //
86 // No operand cycles
87 InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
88 InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
89 //
90 // Binary Instructions that produce a result
91 InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
92 InstrStage<1, [SW_ALU0, SW_ALU1]>],
93 [1, 1]>,
94 InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
95 InstrStage<1, [SW_ALU0, SW_ALU1]>],
96 [1, 1, 1]>,
97 InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
98 InstrStage<1, [SW_ALU0, SW_ALU1]>],
99 [2, 1, 1]>,
100 InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
101 InstrStage<1, [SW_ALU0, SW_ALU1]>],
102 [2, 1, 1]>,
103 InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
104 InstrStage<1, [SW_ALU0, SW_ALU1]>],
105 [2, 1, 1, 1]>,
106 //
107 // Bitwise Instructions that produce a result
108 InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
109 InstrStage<1, [SW_ALU0, SW_ALU1]>],
110 [1, 1]>,
111 InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
112 InstrStage<1, [SW_ALU0, SW_ALU1]>],
113 [1, 1, 1]>,
114 InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
115 InstrStage<1, [SW_ALU0, SW_ALU1]>],
116 [2, 1, 1]>,
117 InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
118 InstrStage<1, [SW_ALU0, SW_ALU1]>],
119 [2, 1, 1, 1]>,
120 //
121 // Unary Instructions that produce a result
122
123 // CLZ, RBIT, etc.
124 InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
125 InstrStage<1, [SW_ALU0, SW_ALU1]>],
126 [1, 1]>,
127
128 // BFC, BFI, UBFX, SBFX
129 InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
130 InstrStage<1, [SW_ALU0, SW_ALU1]>],
131 [2, 1]>,
132
133 //
134 // Zero and sign extension instructions
135 InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
136 InstrStage<1, [SW_ALU0, SW_ALU1]>],
137 [1, 1]>,
138 InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
139 InstrStage<1, [SW_ALU0, SW_ALU1]>],
140 [1, 1, 1]>,
141 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
142 InstrStage<1, [SW_ALU0, SW_ALU1]>],
143 [1, 1, 1, 1]>,
144 //
145 // Compare instructions
146 InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
147 InstrStage<1, [SW_ALU0, SW_ALU1]>],
148 [1]>,
149 InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
150 InstrStage<1, [SW_ALU0, SW_ALU1]>],
151 [1, 1]>,
152 InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
153 InstrStage<2, [SW_ALU0, SW_ALU1]>],
154 [1, 1]>,
155 InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
156 InstrStage<2, [SW_ALU0, SW_ALU1]>],
157 [1, 1, 1]>,
158 //
159 // Test instructions
160 InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
161 InstrStage<1, [SW_ALU0, SW_ALU1]>],
162 [1]>,
163 InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
164 InstrStage<1, [SW_ALU0, SW_ALU1]>],
165 [1, 1]>,
166 InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
167 InstrStage<2, [SW_ALU0, SW_ALU1]>],
168 [1, 1]>,
169 InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
170 InstrStage<2, [SW_ALU0, SW_ALU1]>],
171 [1, 1, 1]>,
172 //
173 // Move instructions, conditional
174 // FIXME: Correctly model the extra input dep on the destination.
175 InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
176 InstrStage<1, [SW_ALU0, SW_ALU1]>],
177 [1]>,
178 InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
179 InstrStage<1, [SW_ALU0, SW_ALU1]>],
180 [1, 1]>,
181 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
182 InstrStage<1, [SW_ALU0, SW_ALU1]>],
183 [1, 1]>,
184 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
185 InstrStage<1, [SW_ALU0, SW_ALU1]>],
186 [2, 1, 1]>,
187 InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
188 InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
189 InstrStage<1, [SW_ALU0, SW_ALU1]>,
190 InstrStage<1, [SW_ALU0, SW_ALU1]>],
191 [2]>,
192
193 // Integer multiply pipeline
194 //
195 InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
196 InstrStage<1, [SW_ALU0]>],
197 [3, 1, 1]>,
198 InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
199 InstrStage<1, [SW_ALU0]>],
200 [3, 1, 1, 1]>,
201 InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
202 InstrStage<1, [SW_ALU0]>],
203 [4, 1, 1]>,
204 InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
205 InstrStage<1, [SW_ALU0]>],
206 [4, 1, 1, 1]>,
207 InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
208 InstrStage<1, [SW_DIS1], 0>,
209 InstrStage<1, [SW_DIS2], 0>,
210 InstrStage<1, [SW_ALU0], 1>,
211 InstrStage<1, [SW_ALU0], 3>,
212 InstrStage<1, [SW_ALU0]>],
213 [5, 5, 1, 1]>,
214 InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
215 InstrStage<1, [SW_DIS1], 0>,
216 InstrStage<1, [SW_DIS2], 0>,
217 InstrStage<1, [SW_ALU0], 1>,
218 InstrStage<1, [SW_ALU0], 1>,
219 InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
220 InstrStage<1, [SW_ALU0, SW_ALU1]>],
221 [5, 6, 1, 1]>,
222 //
223 // Integer divide
224 InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
225 InstrStage<1, [SW_ALU0], 0>,
226 InstrStage<14, [SW_IDIV]>],
227 [14, 1, 1]>,
228
229 // Integer load pipeline
230 // FIXME: The timings are some rough approximations
231 //
232 // Immediate offset
233 InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
234 InstrStage<1, [SW_LS]>],
235 [3, 1]>,
236 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
237 InstrStage<1, [SW_LS]>],
238 [3, 1]>,
239 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
240 InstrStage<1, [SW_DIS1], 0>,
241 InstrStage<1, [SW_LS], 1>,
242 InstrStage<1, [SW_LS]>],
243 [3, 4, 1]>,
244 //
245 // Register offset
246 InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
247 InstrStage<1, [SW_LS]>],
248 [3, 1, 1]>,
249 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
250 InstrStage<1, [SW_LS]>],
251 [3, 1, 1]>,
252 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
253 InstrStage<1, [SW_DIS1], 0>,
254 InstrStage<1, [SW_DIS2], 0>,
255 InstrStage<1, [SW_LS], 1>,
256 InstrStage<1, [SW_LS], 3>,
257 InstrStage<1, [SW_ALU0, SW_ALU1]>],
258 [3, 4, 1, 1]>,
259 //
260 // Scaled register offset
261 InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
262 InstrStage<1, [SW_DIS1], 0>,
263 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
264 InstrStage<1, [SW_LS]>],
265 [5, 1, 1]>,
266 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
267 InstrStage<1, [SW_DIS1], 0>,
268 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
269 InstrStage<1, [SW_LS]>],
270 [5, 1, 1]>,
271 //
272 // Immediate offset with update
273 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
274 InstrStage<1, [SW_DIS1], 0>,
275 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
276 InstrStage<1, [SW_LS]>],
277 [3, 1, 1]>,
278 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
279 InstrStage<1, [SW_DIS1], 0>,
280 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
281 InstrStage<1, [SW_LS]>],
282 [3, 1, 1]>,
283 //
284 // Register offset with update
285 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
286 InstrStage<1, [SW_DIS1], 0>,
287 InstrStage<1, [SW_ALU0], 1>,
288 InstrStage<1, [SW_LS]>],
289 [3, 1, 1, 1]>,
290 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
291 InstrStage<1, [SW_DIS1], 0>,
292 InstrStage<1, [SW_ALU0], 1>,
293 InstrStage<1, [SW_LS]>],
294 [3, 1, 1, 1]>,
295 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
296 InstrStage<1, [SW_DIS1], 0>,
297 InstrStage<1, [SW_DIS2], 0>,
298 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
299 InstrStage<1, [SW_LS], 3>,
300 InstrStage<1, [SW_LS], 0>,
301 InstrStage<1, [SW_ALU0, SW_ALU1]>],
302 [3, 4, 1, 1]>,
303 //
304 // Scaled register offset with update
305 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
306 InstrStage<1, [SW_DIS1], 0>,
307 InstrStage<1, [SW_DIS2], 0>,
308 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
309 InstrStage<1, [SW_LS], 3>,
310 InstrStage<1, [SW_ALU0, SW_ALU1]>],
311 [5, 3, 1, 1]>,
312 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
313 InstrStage<1, [SW_DIS1], 0>,
314 InstrStage<1, [SW_DIS2], 0>,
315 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
316 InstrStage<1, [SW_LS], 0>,
317 InstrStage<1, [SW_ALU0, SW_ALU1]>],
318 [5, 3, 1, 1]>,
319 //
320 // Load multiple, def is the 5th operand.
321 // FIXME: This assumes 3 to 4 registers.
322 InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
323 InstrStage<1, [SW_DIS1], 0>,
324 InstrStage<1, [SW_DIS2], 0>,
325 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
326 InstrStage<1, [SW_LS]>],
327 [1, 1, 1, 1, 3], [], -1>, // dynamic uops
328
329 //
330 // Load multiple + update, defs are the 1st and 5th operands.
331 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
332 InstrStage<1, [SW_DIS1], 0>,
333 InstrStage<1, [SW_DIS2], 0>,
334 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
335 InstrStage<1, [SW_LS], 3>,
336 InstrStage<1, [SW_ALU0, SW_ALU1]>],
337 [2, 1, 1, 1, 3], [], -1>, // dynamic uops
338 //
339 // Load multiple plus branch
340 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
341 InstrStage<1, [SW_DIS1], 0>,
342 InstrStage<1, [SW_DIS2], 0>,
343 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
344 InstrStage<1, [SW_LS]>],
345 [1, 1, 1, 1, 3], [], -1>, // dynamic uops
346 //
347 // Pop, def is the 3rd operand.
348 InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
349 InstrStage<1, [SW_DIS1], 0>,
350 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
351 InstrStage<1, [SW_LS]>],
352 [1, 1, 3], [], -1>, // dynamic uops
353 //
354 // Pop + branch, def is the 3rd operand.
355 InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
356 InstrStage<1, [SW_DIS1], 0>,
357 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
358 InstrStage<1, [SW_LS]>],
359 [1, 1, 3], [], -1>, // dynamic uops
360
361 //
362 // iLoadi + iALUr for t2LDRpci_pic.
363 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
364 InstrStage<1, [SW_LS], 3>,
365 InstrStage<1, [SW_ALU0, SW_ALU1]>],
366 [4, 1]>,
367
368 // Integer store pipeline
369 ///
370 // Immediate offset
371 InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
372 InstrStage<1, [SW_LS]>],
373 [1, 1]>,
374 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
375 InstrStage<1, [SW_LS]>],
376 [1, 1]>,
377 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
378 InstrStage<1, [SW_DIS1], 0>,
379 InstrStage<1, [SW_DIS2], 0>,
380 InstrStage<1, [SW_LS], 0>,
381 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
382 InstrStage<1, [SW_LS]>],
383 [1, 1]>,
384 //
385 // Register offset
386 InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
387 InstrStage<1, [SW_LS]>],
388 [1, 1, 1]>,
389 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
390 InstrStage<1, [SW_LS]>],
391 [1, 1, 1]>,
392 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
393 InstrStage<1, [SW_DIS1], 0>,
394 InstrStage<1, [SW_DIS2], 0>,
395 InstrStage<1, [SW_LS], 0>,
396 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
397 InstrStage<1, [SW_LS]>],
398 [1, 1, 1]>,
399 //
400 // Scaled register offset
401 InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
402 InstrStage<1, [SW_DIS1], 0>,
403 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
404 InstrStage<1, [SW_LS]>],
405 [1, 1, 1]>,
406 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
407 InstrStage<1, [SW_DIS1], 0>,
408 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
409 InstrStage<1, [SW_LS]>],
410 [1, 1, 1]>,
411 //
412 // Immediate offset with update
413 InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
414 InstrStage<1, [SW_DIS1], 0>,
415 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
416 InstrStage<1, [SW_LS]>],
417 [1, 1, 1]>,
418 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
419 InstrStage<1, [SW_DIS1], 0>,
420 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
421 InstrStage<1, [SW_LS]>],
422 [1, 1, 1]>,
423 //
424 // Register offset with update
425 InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
426 InstrStage<1, [SW_DIS1], 0>,
427 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
428 InstrStage<1, [SW_LS]>],
429 [1, 1, 1, 1]>,
430 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
431 InstrStage<1, [SW_DIS1], 0>,
432 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
433 InstrStage<1, [SW_LS]>],
434 [1, 1, 1, 1]>,
435 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
436 InstrStage<1, [SW_DIS1], 0>,
437 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
438 InstrStage<1, [SW_LS]>],
439 [1, 1, 1, 1]>,
440 //
441 // Scaled register offset with update
442 InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
443 InstrStage<1, [SW_DIS1], 0>,
444 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
445 InstrStage<1, [SW_LS], 0>,
446 InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
447 [3, 1, 1, 1]>,
448 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
449 InstrStage<1, [SW_DIS1], 0>,
450 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
451 InstrStage<1, [SW_LS], 0>,
452 InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
453 [3, 1, 1, 1]>,
454 //
455 // Store multiple
456 InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
457 InstrStage<1, [SW_DIS1], 0>,
458 InstrStage<1, [SW_DIS2], 0>,
459 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
460 InstrStage<1, [SW_LS], 1>,
461 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
462 InstrStage<1, [SW_LS], 1>,
463 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
464 InstrStage<1, [SW_LS]>],
465 [], [], -1>, // dynamic uops
466 //
467 // Store multiple + update
468 InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
469 InstrStage<1, [SW_DIS1], 0>,
470 InstrStage<1, [SW_DIS2], 0>,
471 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
472 InstrStage<1, [SW_LS], 1>,
473 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
474 InstrStage<1, [SW_LS], 1>,
475 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
476 InstrStage<1, [SW_LS]>],
477 [2], [], -1>, // dynamic uops
478
479 //
480 // Preload
481 InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
482
483 // Branch
484 //
485 // no delay slots, so the latency of a branch is unimportant
486 InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
487
488 // FP Special Register to Integer Register File Move
489 InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
490 InstrStage<1, [SW_ALU0, SW_ALU1]>],
491 [1]>,
492 //
493 // Single-precision FP Unary
494 //
495 // Most floating-point moves get issued on ALU0.
496 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
497 InstrStage<1, [SW_ALU0]>],
498 [2, 1]>,
499 //
500 // Double-precision FP Unary
501 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
502 InstrStage<1, [SW_ALU0]>],
503 [2, 1]>,
504
505 //
506 // Single-precision FP Compare
507 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
508 InstrStage<1, [SW_ALU0]>],
509 [1, 1]>,
510 //
511 // Double-precision FP Compare
512 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
513 InstrStage<1, [SW_ALU0]>],
514 [1, 1]>,
515 //
516 // Single to Double FP Convert
517 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
518 InstrStage<1, [SW_ALU1]>],
519 [4, 1]>,
520 //
521 // Double to Single FP Convert
522 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
523 InstrStage<1, [SW_ALU1]>],
524 [4, 1]>,
525
526 //
527 // Single to Half FP Convert
528 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
529 InstrStage<1, [SW_DIS1], 0>,
530 InstrStage<1, [SW_ALU1], 4>,
531 InstrStage<1, [SW_ALU1]>],
532 [6, 1]>,
533 //
534 // Half to Single FP Convert
535 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
536 InstrStage<1, [SW_ALU1]>],
537 [4, 1]>,
538
539 //
540 // Single-Precision FP to Integer Convert
541 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
542 InstrStage<1, [SW_ALU1]>],
543 [4, 1]>,
544 //
545 // Double-Precision FP to Integer Convert
546 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
547 InstrStage<1, [SW_ALU1]>],
548 [4, 1]>,
549 //
550 // Integer to Single-Precision FP Convert
551 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
552 InstrStage<1, [SW_ALU1]>],
553 [4, 1]>,
554 //
555 // Integer to Double-Precision FP Convert
556 InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
557 InstrStage<1, [SW_ALU1]>],
558 [4, 1]>,
559 //
560 // Single-precision FP ALU
561 InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
562 InstrStage<1, [SW_ALU0]>],
563 [2, 1, 1]>,
564 //
565 // Double-precision FP ALU
566 InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
567 InstrStage<1, [SW_ALU0]>],
568 [2, 1, 1]>,
569 //
570 // Single-precision FP Multiply
571 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
572 InstrStage<1, [SW_ALU1]>],
573 [4, 1, 1]>,
574 //
575 // Double-precision FP Multiply
576 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
577 InstrStage<1, [SW_ALU1]>],
578 [6, 1, 1]>,
579 //
580 // Single-precision FP MAC
581 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
582 InstrStage<1, [SW_ALU1]>],
583 [8, 1, 1]>,
584 //
585 // Double-precision FP MAC
586 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
587 InstrStage<1, [SW_ALU1]>],
588 [12, 1, 1]>,
589 //
590 // Single-precision Fused FP MAC
591 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
592 InstrStage<1, [SW_ALU1]>],
593 [8, 1, 1]>,
594 //
595 // Double-precision Fused FP MAC
596 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
597 InstrStage<1, [SW_ALU1]>],
598 [12, 1, 1]>,
599 //
600 // Single-precision FP DIV
601 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
602 InstrStage<1, [SW_ALU1], 0>,
603 InstrStage<15, [SW_FDIV]>],
604 [17, 1, 1]>,
605 //
606 // Double-precision FP DIV
607 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
608 InstrStage<1, [SW_ALU1], 0>,
609 InstrStage<30, [SW_FDIV]>],
610 [32, 1, 1]>,
611 //
612 // Single-precision FP SQRT
613 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
614 InstrStage<1, [SW_ALU1], 0>,
615 InstrStage<15, [SW_FDIV]>],
616 [17, 1]>,
617 //
618 // Double-precision FP SQRT
619 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
620 InstrStage<1, [SW_ALU1], 0>,
621 InstrStage<30, [SW_FDIV]>],
622 [32, 1, 1]>,
623
624 //
625 // Integer to Single-precision Move
626 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
627 InstrStage<1, [SW_DIS1], 0>,
628 InstrStage<1, [SW_LS], 4>,
629 InstrStage<1, [SW_ALU0]>],
630 [6, 1]>,
631 //
632 // Integer to Double-precision Move
633 InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
634 InstrStage<1, [SW_LS]>],
635 [4, 1]>,
636 //
637 // Single-precision to Integer Move
638 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
639 InstrStage<1, [SW_LS]>],
640 [3, 1]>,
641 //
642 // Double-precision to Integer Move
643 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
644 InstrStage<1, [SW_DIS1], 0>,
645 InstrStage<1, [SW_LS], 3>,
646 InstrStage<1, [SW_LS]>],
647 [3, 4, 1]>,
648 //
649 // Single-precision FP Load
650 InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
651 InstrStage<1, [SW_LS]>],
652 [4, 1]>,
653 //
654 // Double-precision FP Load
655 InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
656 InstrStage<1, [SW_LS]>],
657 [4, 1]>,
658 //
659 // FP Load Multiple
660 // FIXME: Assumes a single Q register.
661 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
662 InstrStage<1, [SW_LS]>],
663 [1, 1, 1, 4], [], -1>, // dynamic uops
664 //
665 // FP Load Multiple + update
666 // FIXME: Assumes a single Q register.
667 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
668 InstrStage<1, [SW_DIS1], 0>,
669 InstrStage<1, [SW_LS], 4>,
670 InstrStage<1, [SW_ALU0, SW_ALU1]>],
671 [2, 1, 1, 1, 4], [], -1>, // dynamic uops
672 //
673 // Single-precision FP Store
674 InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
675 InstrStage<1, [SW_LS]>],
676 [1, 1]>,
677 //
678 // Double-precision FP Store
679 InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
680 InstrStage<1, [SW_LS]>],
681 [1, 1]>,
682 //
683 // FP Store Multiple
684 // FIXME: Assumes a single Q register.
685 InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
686 InstrStage<1, [SW_LS]>],
687 [1, 1, 1], [], -1>, // dynamic uops
688 //
689 // FP Store Multiple + update
690 // FIXME: Assumes a single Q register.
691 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
692 InstrStage<1, [SW_DIS1], 0>,
693 InstrStage<1, [SW_LS], 4>,
694 InstrStage<1, [SW_ALU0, SW_ALU1]>],
695 [2, 1, 1, 1], [], -1>, // dynamic uops
696 // NEON
697 //
698 // Double-register Integer Unary
699 InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
700 InstrStage<1, [SW_ALU0]>],
701 [4, 1]>,
702 //
703 // Quad-register Integer Unary
704 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
705 InstrStage<1, [SW_ALU0]>],
706 [4, 1]>,
707 //
708 // Double-register Integer Q-Unary
709 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
710 InstrStage<1, [SW_ALU0]>],
711 [4, 1]>,
712 //
713 // Quad-register Integer CountQ-Unary
714 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
715 InstrStage<1, [SW_ALU0]>],
716 [4, 1]>,
717 //
718 // Double-register Integer Binary
719 InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
720 InstrStage<1, [SW_ALU0]>],
721 [2, 1, 1]>,
722 //
723 // Quad-register Integer Binary
724 InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
725 InstrStage<1, [SW_ALU0]>],
726 [2, 1, 1]>,
727 //
728 // Double-register Integer Subtract
729 InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
730 InstrStage<1, [SW_ALU0]>],
731 [2, 1, 1]>,
732 //
733 // Quad-register Integer Subtract
734 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
735 InstrStage<1, [SW_ALU0]>],
736 [2, 1, 1]>,
737 //
738 // Double-register Integer Shift
739 InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
740 InstrStage<1, [SW_ALU0]>],
741 [2, 1, 1]>,
742 //
743 // Quad-register Integer Shift
744 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
745 InstrStage<1, [SW_ALU0]>],
746 [2, 1, 1]>,
747 //
748 // Double-register Integer Shift (4 cycle)
749 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
750 InstrStage<1, [SW_ALU0]>],
751 [4, 1, 1]>,
752 //
753 // Quad-register Integer Shift (4 cycle)
754 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
755 InstrStage<1, [SW_ALU0]>],
756 [4, 1, 1]>,
757 //
758 // Double-register Integer Binary (4 cycle)
759 InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
760 InstrStage<1, [SW_ALU0]>],
761 [4, 1, 1]>,
762 //
763 // Quad-register Integer Binary (4 cycle)
764 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
765 InstrStage<1, [SW_ALU0]>],
766 [4, 1, 1]>,
767 //
768 // Double-register Integer Subtract (4 cycle)
769 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
770 InstrStage<1, [SW_ALU0]>],
771 [4, 1, 1]>,
772 //
773 // Quad-register Integer Subtract (4 cycle)
774 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
775 InstrStage<1, [SW_ALU0]>],
776 [4, 1, 1]>,
777
778 //
779 // Double-register Integer Count
780 InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
781 InstrStage<1, [SW_ALU0]>],
782 [2, 1, 1]>,
783 //
784 // Quad-register Integer Count
785 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
786 InstrStage<1, [SW_ALU0]>],
787 [2, 1, 1]>,
788 //
789 // Double-register Absolute Difference and Accumulate
790 InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
791 InstrStage<1, [SW_ALU0]>],
792 [4, 1, 1, 1]>,
793 //
794 // Quad-register Absolute Difference and Accumulate
795 InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
796 InstrStage<1, [SW_ALU0]>],
797 [4, 1, 1, 1]>,
798 //
799 // Double-register Integer Pair Add Long
800 InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
801 InstrStage<1, [SW_ALU0]>],
802 [4, 1, 1]>,
803 //
804 // Quad-register Integer Pair Add Long
805 InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
806 InstrStage<1, [SW_ALU0]>],
807 [4, 1, 1]>,
808
809 //
810 // Double-register Integer Multiply (.8, .16)
811 InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
812 InstrStage<1, [SW_ALU1]>],
813 [4, 1, 1]>,
814 //
815 // Quad-register Integer Multiply (.8, .16)
816 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
817 InstrStage<1, [SW_ALU1]>],
818 [4, 1, 1]>,
819
820 //
821 // Double-register Integer Multiply (.32)
822 InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
823 InstrStage<1, [SW_ALU1]>],
824 [4, 1, 1]>,
825 //
826 // Quad-register Integer Multiply (.32)
827 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
828 InstrStage<1, [SW_ALU1]>],
829 [4, 1, 1]>,
830 //
831 // Double-register Integer Multiply-Accumulate (.8, .16)
832 InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
833 InstrStage<1, [SW_ALU1]>],
834 [4, 1, 1, 1]>,
835 //
836 // Double-register Integer Multiply-Accumulate (.32)
837 InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
838 InstrStage<1, [SW_ALU1]>],
839 [4, 1, 1, 1]>,
840 //
841 // Quad-register Integer Multiply-Accumulate (.8, .16)
842 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
843 InstrStage<1, [SW_ALU1]>],
844 [4, 1, 1, 1]>,
845 //
846 // Quad-register Integer Multiply-Accumulate (.32)
847 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
848 InstrStage<1, [SW_ALU1]>],
849 [4, 1, 1, 1]>,
850
851 //
852 // Move
853 InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
854 InstrStage<1, [SW_ALU0]>],
855 [2, 1]>,
856 //
857 // Move Immediate
858 InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
859 InstrStage<1, [SW_ALU0]>],
860 [2]>,
861 //
862 // Double-register Permute Move
863 InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
864 InstrStage<1, [SW_ALU1]>],
865 [2, 1]>,
866 //
867 // Quad-register Permute Move
868 InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
869 InstrStage<1, [SW_ALU1]>],
870 [2, 1]>,
871 //
872 // Integer to Single-precision Move
873 InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
874 InstrStage<1, [SW_DIS1], 0>,
875 InstrStage<1, [SW_LS], 4>,
876 InstrStage<1, [SW_ALU0]>],
877 [6, 1]>,
878 //
879 // Integer to Double-precision Move
880 InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
881 InstrStage<1, [SW_LS]>],
882 [4, 1, 1]>,
883 //
884 // Single-precision to Integer Move
885 InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
886 InstrStage<1, [SW_LS]>],
887 [3, 1]>,
888 //
889 // Double-precision to Integer Move
890 InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
891 InstrStage<1, [SW_DIS1], 0>,
892 InstrStage<1, [SW_LS], 3>,
893 InstrStage<1, [SW_LS]>],
894 [3, 4, 1]>,
895 //
896 // Integer to Lane Move
897 // FIXME: I think this is correct, but it is not clear from the tuning guide.
898 InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
899 InstrStage<1, [SW_DIS1], 0>,
900 InstrStage<1, [SW_LS], 4>,
901 InstrStage<1, [SW_ALU0]>],
902 [6, 1]>,
903
904 //
905 // Vector narrow move
906 InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
907 InstrStage<1, [SW_ALU1]>],
908 [2, 1]>,
909 //
910 // Double-register FP Unary
911 // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
912 // and they issue on a different pipeline.
913 InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
914 InstrStage<1, [SW_ALU0]>],
915 [2, 1]>,
916 //
917 // Quad-register FP Unary
918 // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
919 // and they issue on a different pipeline.
920 InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
921 InstrStage<1, [SW_ALU0]>],
922 [2, 1]>,
923 //
924 // Double-register FP Binary
925 // FIXME: We're using this itin for many instructions.
926 InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
927 InstrStage<1, [SW_ALU0]>],
928 [4, 1, 1]>,
929
930 //
931 // VPADD, etc.
932 InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
933 InstrStage<1, [SW_ALU0]>],
934 [4, 1, 1]>,
935 //
936 // Double-register FP VMUL
937 InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
938 InstrStage<1, [SW_ALU1]>],
939 [4, 1, 1]>,
940 //
941 // Quad-register FP Binary
942 InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
943 InstrStage<1, [SW_ALU0]>],
944 [4, 1, 1]>,
945 //
946 // Quad-register FP VMUL
947 InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
948 InstrStage<1, [SW_ALU1]>],
949 [4, 1, 1]>,
950 //
951 // Double-register FP Multiple-Accumulate
952 InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
953 InstrStage<1, [SW_ALU1]>],
954 [8, 1, 1]>,
955 //
956 // Quad-register FP Multiple-Accumulate
957 InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
958 InstrStage<1, [SW_ALU1]>],
959 [8, 1, 1]>,
960 //
961 // Double-register Fused FP Multiple-Accumulate
962 InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
963 InstrStage<1, [SW_ALU1]>],
964 [8, 1, 1]>,
965 //
966 // Quad-register FusedF P Multiple-Accumulate
967 InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
968 InstrStage<1, [SW_ALU1]>],
969 [8, 1, 1]>,
970 //
971 // Double-register Reciprical Step
972 InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
973 InstrStage<1, [SW_ALU1]>],
974 [8, 1, 1]>,
975 //
976 // Quad-register Reciprical Step
977 InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
978 InstrStage<1, [SW_ALU1]>],
979 [8, 1, 1]>,
980 //
981 // Double-register Permute
982 // FIXME: The latencies are unclear from the documentation.
983 InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
984 InstrStage<1, [SW_DIS1], 0>,
985 InstrStage<1, [SW_DIS2], 0>,
986 InstrStage<1, [SW_ALU1], 2>,
987 InstrStage<1, [SW_ALU1], 2>,
988 InstrStage<1, [SW_ALU1]>],
989 [3, 4, 3, 4]>,
990 //
991 // Quad-register Permute
992 // FIXME: The latencies are unclear from the documentation.
993 InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
994 InstrStage<1, [SW_DIS1], 0>,
995 InstrStage<1, [SW_DIS2], 0>,
996 InstrStage<1, [SW_ALU1], 2>,
997 InstrStage<1, [SW_ALU1], 2>,
998 InstrStage<1, [SW_ALU1]>],
999 [3, 4, 3, 4]>,
1000 //
1001 // Quad-register Permute (3 cycle issue on A9)
1002 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
1003 InstrStage<1, [SW_DIS1], 0>,
1004 InstrStage<1, [SW_DIS2], 0>,
1005 InstrStage<1, [SW_ALU1], 2>,
1006 InstrStage<1, [SW_ALU1], 2>,
1007 InstrStage<1, [SW_ALU1]>],
1008 [3, 4, 3, 4]>,
1009
1010 //
1011 // Double-register VEXT
1012 InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1013 InstrStage<1, [SW_ALU1]>],
1014 [2, 1, 1]>,
1015 //
1016 // Quad-register VEXT
1017 InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1018 InstrStage<1, [SW_ALU1]>],
1019 [2, 1, 1]>,
1020 //
1021 // VTB
1022 InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1023 InstrStage<1, [SW_ALU1]>],
1024 [2, 1, 1]>,
1025 InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
1026 InstrStage<1, [SW_DIS1], 0>,
1027 InstrStage<1, [SW_ALU1], 2>,
1028 InstrStage<1, [SW_ALU1]>],
1029 [4, 1, 3, 3]>,
1030 InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
1031 InstrStage<1, [SW_DIS1], 0>,
1032 InstrStage<1, [SW_DIS2], 0>,
1033 InstrStage<1, [SW_ALU1], 2>,
1034 InstrStage<1, [SW_ALU1], 2>,
1035 InstrStage<1, [SW_ALU1]>],
1036 [6, 1, 3, 5, 5]>,
1037 InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
1038 InstrStage<1, [SW_DIS1], 0>,
1039 InstrStage<1, [SW_DIS2], 0>,
1040 InstrStage<1, [SW_ALU1], 2>,
1041 InstrStage<1, [SW_ALU1], 2>,
1042 InstrStage<1, [SW_ALU1], 2>,
1043 InstrStage<1, [SW_ALU1]>],
1044 [8, 1, 3, 5, 7, 7]>,
1045 //
1046 // VTBX
1047 InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1048 InstrStage<1, [SW_ALU1]>],
1049 [2, 1, 1]>,
1050 InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
1051 InstrStage<1, [SW_DIS1], 0>,
1052 InstrStage<1, [SW_ALU1], 2>,
1053 InstrStage<1, [SW_ALU1]>],
1054 [4, 1, 3, 3]>,
1055 InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
1056 InstrStage<1, [SW_DIS1], 0>,
1057 InstrStage<1, [SW_DIS2], 0>,
1058 InstrStage<1, [SW_ALU1], 2>,
1059 InstrStage<1, [SW_ALU1], 2>,
1060 InstrStage<1, [SW_ALU1]>],
1061 [6, 1, 3, 5, 5]>,
1062 InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
1063 InstrStage<1, [SW_DIS1], 0>,
1064 InstrStage<1, [SW_DIS2], 0>,
1065 InstrStage<1, [SW_ALU1], 2>,
1066 InstrStage<1, [SW_ALU1], 2>,
1067 InstrStage<1, [SW_ALU1], 2>,
1068 InstrStage<1, [SW_ALU1]>],
1069 [8, 1, 3, 5, 7, 7]>
1070]>;
1071
1072// ===---------------------------------------------------------------------===//
1073// This following definitions describe the simple machine model which
1074// will replace itineraries.
1075
1076// Swift machine model for scheduling and other instruction cost heuristics.
1077def SwiftModel : SchedMachineModel {
1078 let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
1079 let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
1080 let LoadLatency = 3;
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001081 let MispredictPenalty = 14; // A branch direction mispredict.
Bob Wilsone8a549c2012-09-29 21:43:49 +00001082
1083 let Itineraries = SwiftItineraries;
1084}
1085
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001086// Swift predicates.
1087def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
1088
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001089// Swift resource mapping.
1090let SchedModel = SwiftModel in {
1091 // Processor resources.
1092 def SwiftUnitP01 : ProcResource<2>; // ALU unit.
1093 def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
1094 def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
1095 def SwiftUnitP2 : ProcResource<1>; // LS unit.
1096 def SwiftUnitDiv : ProcResource<1>;
1097
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001098 // Generic resource requirements.
1099 def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
1100 def SwiftWriteP01ThreeCycleTwoUops :
1101 SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> {
1102 let Latency = 3;
1103 let NumMicroOps = 2;
1104 }
1105 def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
1106 let Latency = 3;
1107 let NumMicroOps = 3;
1108 let ResourceCycles = [3];
1109 }
1110
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001111 // 4.2.4 Arithmetic and Logical.
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001112 // ALU operation register shifted by immediate variant.
1113 def SwiftWriteALUsi : SchedWriteVariant<[
1114 // lsl #2, lsl #1, or lsr #1.
1115 SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001116 SchedVar<NoSchedPred, [WriteALU]>
1117 ]>;
1118 def SwiftWriteALUsr : SchedWriteVariant<[
1119 SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001120 SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001121 ]>;
1122 def SwiftWriteALUSsr : SchedWriteVariant<[
1123 SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001124 SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001125 ]>;
1126 def SwiftReadAdvanceALUsr : SchedReadVariant<[
1127 SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001128 SchedVar<NoSchedPred, [NoReadAdvance]>
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001129 ]>;
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001130 // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001131 // AND,BIC,EOR,ORN,ORR
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001132 // CLZ,RBIT,REV,REV16,REVSH,PKH
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001133 def : WriteRes<WriteALU, [SwiftUnitP01]>;
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001134 def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
1135 def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
1136 def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001137 def : ReadAdvance<ReadALU, 0>;
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001138 def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001139
1140 // 4.2.5 Integer comparison
1141 def : WriteRes<WriteCMP, [SwiftUnitP01]>;
1142 def : WriteRes<WriteCMPsi, [SwiftUnitP01]>;
1143 def : WriteRes<WriteCMPsr, [SwiftUnitP01]>;
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001144}