blob: b03d5ff44c6e77c18928942292f1738835e83873 [file] [log] [blame]
Bob Wilsone8a549c2012-09-29 21:43:49 +00001//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the Swift processor..
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// This section contains legacy support for itineraries. This is
16// required until SD and PostRA schedulers are replaced by MachineScheduler.
17
18def SW_DIS0 : FuncUnit;
19def SW_DIS1 : FuncUnit;
20def SW_DIS2 : FuncUnit;
21
22def SW_ALU0 : FuncUnit;
23def SW_ALU1 : FuncUnit;
24def SW_LS : FuncUnit;
25def SW_IDIV : FuncUnit;
26def SW_FDIV : FuncUnit;
27
28// FIXME: Need bypasses.
29// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
30// IIC_iMOVix2ld better.
31// FIXME: Model the special immediate shifts that are not microcoded.
32// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
33// to issue on pipe 1?
34// FIXME: Model the pipelined behavior of CMP / TST instructions.
35// FIXME: Better model the microcode stages of multiply instructions, especially
36// conditional variants.
37// FIXME: Add preload instruction when it is documented.
38// FIXME: Model non-pipelined nature of FP div / sqrt unit.
39
40def SwiftItineraries : ProcessorItineraries<
41 [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
42 //
43 // Move instructions, unconditional
44 InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
45 InstrStage<1, [SW_ALU0, SW_ALU1]>],
46 [1]>,
47 InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
48 InstrStage<1, [SW_ALU0, SW_ALU1]>],
49 [1]>,
50 InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
51 InstrStage<1, [SW_ALU0, SW_ALU1]>],
52 [1]>,
53 InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
54 InstrStage<1, [SW_ALU0, SW_ALU1]>],
55 [1]>,
56 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
57 InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
58 InstrStage<1, [SW_ALU0, SW_ALU1]>,
59 InstrStage<1, [SW_ALU0, SW_ALU1]>],
60 [2]>,
61 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
62 InstrStage<1, [SW_ALU0, SW_ALU1]>,
63 InstrStage<1, [SW_ALU0, SW_ALU1]>,
64 InstrStage<1, [SW_ALU0, SW_ALU1]>],
65 [3]>,
66 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
67 InstrStage<1, [SW_ALU0, SW_ALU1]>,
68 InstrStage<1, [SW_ALU0, SW_ALU1]>,
69 InstrStage<1, [SW_LS]>],
70 [5]>,
71 //
72 // MVN instructions
73 InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
74 InstrStage<1, [SW_ALU0, SW_ALU1]>],
75 [1]>,
76 InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
77 InstrStage<1, [SW_ALU0, SW_ALU1]>],
78 [1]>,
79 InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
80 InstrStage<1, [SW_ALU0, SW_ALU1]>],
81 [1]>,
82 InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
83 InstrStage<1, [SW_ALU0, SW_ALU1]>],
84 [1]>,
85 //
86 // No operand cycles
87 InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
88 InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
89 //
90 // Binary Instructions that produce a result
91 InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
92 InstrStage<1, [SW_ALU0, SW_ALU1]>],
93 [1, 1]>,
94 InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
95 InstrStage<1, [SW_ALU0, SW_ALU1]>],
96 [1, 1, 1]>,
97 InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
98 InstrStage<1, [SW_ALU0, SW_ALU1]>],
99 [2, 1, 1]>,
100 InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
101 InstrStage<1, [SW_ALU0, SW_ALU1]>],
102 [2, 1, 1]>,
103 InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
104 InstrStage<1, [SW_ALU0, SW_ALU1]>],
105 [2, 1, 1, 1]>,
106 //
107 // Bitwise Instructions that produce a result
108 InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
109 InstrStage<1, [SW_ALU0, SW_ALU1]>],
110 [1, 1]>,
111 InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
112 InstrStage<1, [SW_ALU0, SW_ALU1]>],
113 [1, 1, 1]>,
114 InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
115 InstrStage<1, [SW_ALU0, SW_ALU1]>],
116 [2, 1, 1]>,
117 InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
118 InstrStage<1, [SW_ALU0, SW_ALU1]>],
119 [2, 1, 1, 1]>,
120 //
121 // Unary Instructions that produce a result
122
123 // CLZ, RBIT, etc.
124 InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
125 InstrStage<1, [SW_ALU0, SW_ALU1]>],
126 [1, 1]>,
127
128 // BFC, BFI, UBFX, SBFX
129 InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
130 InstrStage<1, [SW_ALU0, SW_ALU1]>],
131 [2, 1]>,
132
133 //
134 // Zero and sign extension instructions
135 InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
136 InstrStage<1, [SW_ALU0, SW_ALU1]>],
137 [1, 1]>,
138 InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
139 InstrStage<1, [SW_ALU0, SW_ALU1]>],
140 [1, 1, 1]>,
141 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
142 InstrStage<1, [SW_ALU0, SW_ALU1]>],
143 [1, 1, 1, 1]>,
144 //
145 // Compare instructions
146 InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
147 InstrStage<1, [SW_ALU0, SW_ALU1]>],
148 [1]>,
149 InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
150 InstrStage<1, [SW_ALU0, SW_ALU1]>],
151 [1, 1]>,
152 InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
153 InstrStage<2, [SW_ALU0, SW_ALU1]>],
154 [1, 1]>,
155 InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
156 InstrStage<2, [SW_ALU0, SW_ALU1]>],
157 [1, 1, 1]>,
158 //
159 // Test instructions
160 InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
161 InstrStage<1, [SW_ALU0, SW_ALU1]>],
162 [1]>,
163 InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
164 InstrStage<1, [SW_ALU0, SW_ALU1]>],
165 [1, 1]>,
166 InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
167 InstrStage<2, [SW_ALU0, SW_ALU1]>],
168 [1, 1]>,
169 InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
170 InstrStage<2, [SW_ALU0, SW_ALU1]>],
171 [1, 1, 1]>,
172 //
173 // Move instructions, conditional
174 // FIXME: Correctly model the extra input dep on the destination.
175 InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
176 InstrStage<1, [SW_ALU0, SW_ALU1]>],
177 [1]>,
178 InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
179 InstrStage<1, [SW_ALU0, SW_ALU1]>],
180 [1, 1]>,
181 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
182 InstrStage<1, [SW_ALU0, SW_ALU1]>],
183 [1, 1]>,
184 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
185 InstrStage<1, [SW_ALU0, SW_ALU1]>],
186 [2, 1, 1]>,
187 InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
188 InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
189 InstrStage<1, [SW_ALU0, SW_ALU1]>,
190 InstrStage<1, [SW_ALU0, SW_ALU1]>],
191 [2]>,
192
193 // Integer multiply pipeline
194 //
195 InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
196 InstrStage<1, [SW_ALU0]>],
197 [3, 1, 1]>,
198 InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
199 InstrStage<1, [SW_ALU0]>],
200 [3, 1, 1, 1]>,
201 InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
202 InstrStage<1, [SW_ALU0]>],
203 [4, 1, 1]>,
204 InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
205 InstrStage<1, [SW_ALU0]>],
206 [4, 1, 1, 1]>,
207 InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
208 InstrStage<1, [SW_DIS1], 0>,
209 InstrStage<1, [SW_DIS2], 0>,
210 InstrStage<1, [SW_ALU0], 1>,
211 InstrStage<1, [SW_ALU0], 3>,
212 InstrStage<1, [SW_ALU0]>],
213 [5, 5, 1, 1]>,
214 InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
215 InstrStage<1, [SW_DIS1], 0>,
216 InstrStage<1, [SW_DIS2], 0>,
217 InstrStage<1, [SW_ALU0], 1>,
218 InstrStage<1, [SW_ALU0], 1>,
219 InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
220 InstrStage<1, [SW_ALU0, SW_ALU1]>],
221 [5, 6, 1, 1]>,
222 //
223 // Integer divide
224 InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
225 InstrStage<1, [SW_ALU0], 0>,
226 InstrStage<14, [SW_IDIV]>],
227 [14, 1, 1]>,
228
229 // Integer load pipeline
230 // FIXME: The timings are some rough approximations
231 //
232 // Immediate offset
233 InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
234 InstrStage<1, [SW_LS]>],
235 [3, 1]>,
236 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
237 InstrStage<1, [SW_LS]>],
238 [3, 1]>,
239 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
240 InstrStage<1, [SW_DIS1], 0>,
241 InstrStage<1, [SW_LS], 1>,
242 InstrStage<1, [SW_LS]>],
243 [3, 4, 1]>,
244 //
245 // Register offset
246 InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
247 InstrStage<1, [SW_LS]>],
248 [3, 1, 1]>,
249 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
250 InstrStage<1, [SW_LS]>],
251 [3, 1, 1]>,
252 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
253 InstrStage<1, [SW_DIS1], 0>,
254 InstrStage<1, [SW_DIS2], 0>,
255 InstrStage<1, [SW_LS], 1>,
256 InstrStage<1, [SW_LS], 3>,
257 InstrStage<1, [SW_ALU0, SW_ALU1]>],
258 [3, 4, 1, 1]>,
259 //
260 // Scaled register offset
261 InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
262 InstrStage<1, [SW_DIS1], 0>,
263 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
264 InstrStage<1, [SW_LS]>],
265 [5, 1, 1]>,
266 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
267 InstrStage<1, [SW_DIS1], 0>,
268 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
269 InstrStage<1, [SW_LS]>],
270 [5, 1, 1]>,
271 //
272 // Immediate offset with update
273 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
274 InstrStage<1, [SW_DIS1], 0>,
275 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
276 InstrStage<1, [SW_LS]>],
277 [3, 1, 1]>,
278 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
279 InstrStage<1, [SW_DIS1], 0>,
280 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
281 InstrStage<1, [SW_LS]>],
282 [3, 1, 1]>,
283 //
284 // Register offset with update
285 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
286 InstrStage<1, [SW_DIS1], 0>,
287 InstrStage<1, [SW_ALU0], 1>,
288 InstrStage<1, [SW_LS]>],
289 [3, 1, 1, 1]>,
290 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
291 InstrStage<1, [SW_DIS1], 0>,
292 InstrStage<1, [SW_ALU0], 1>,
293 InstrStage<1, [SW_LS]>],
294 [3, 1, 1, 1]>,
295 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
296 InstrStage<1, [SW_DIS1], 0>,
297 InstrStage<1, [SW_DIS2], 0>,
298 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
299 InstrStage<1, [SW_LS], 3>,
300 InstrStage<1, [SW_LS], 0>,
301 InstrStage<1, [SW_ALU0, SW_ALU1]>],
302 [3, 4, 1, 1]>,
303 //
304 // Scaled register offset with update
305 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
306 InstrStage<1, [SW_DIS1], 0>,
307 InstrStage<1, [SW_DIS2], 0>,
308 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
309 InstrStage<1, [SW_LS], 3>,
310 InstrStage<1, [SW_ALU0, SW_ALU1]>],
311 [5, 3, 1, 1]>,
312 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
313 InstrStage<1, [SW_DIS1], 0>,
314 InstrStage<1, [SW_DIS2], 0>,
315 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
316 InstrStage<1, [SW_LS], 0>,
317 InstrStage<1, [SW_ALU0, SW_ALU1]>],
318 [5, 3, 1, 1]>,
319 //
320 // Load multiple, def is the 5th operand.
321 // FIXME: This assumes 3 to 4 registers.
322 InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
323 InstrStage<1, [SW_DIS1], 0>,
324 InstrStage<1, [SW_DIS2], 0>,
325 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
326 InstrStage<1, [SW_LS]>],
327 [1, 1, 1, 1, 3], [], -1>, // dynamic uops
328
329 //
330 // Load multiple + update, defs are the 1st and 5th operands.
331 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
332 InstrStage<1, [SW_DIS1], 0>,
333 InstrStage<1, [SW_DIS2], 0>,
334 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
335 InstrStage<1, [SW_LS], 3>,
336 InstrStage<1, [SW_ALU0, SW_ALU1]>],
337 [2, 1, 1, 1, 3], [], -1>, // dynamic uops
338 //
339 // Load multiple plus branch
340 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
341 InstrStage<1, [SW_DIS1], 0>,
342 InstrStage<1, [SW_DIS2], 0>,
343 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
344 InstrStage<1, [SW_LS]>],
345 [1, 1, 1, 1, 3], [], -1>, // dynamic uops
346 //
347 // Pop, def is the 3rd operand.
348 InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
349 InstrStage<1, [SW_DIS1], 0>,
350 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
351 InstrStage<1, [SW_LS]>],
352 [1, 1, 3], [], -1>, // dynamic uops
353 //
354 // Pop + branch, def is the 3rd operand.
355 InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
356 InstrStage<1, [SW_DIS1], 0>,
357 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
358 InstrStage<1, [SW_LS]>],
359 [1, 1, 3], [], -1>, // dynamic uops
360
361 //
362 // iLoadi + iALUr for t2LDRpci_pic.
363 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
364 InstrStage<1, [SW_LS], 3>,
365 InstrStage<1, [SW_ALU0, SW_ALU1]>],
366 [4, 1]>,
367
368 // Integer store pipeline
369 ///
370 // Immediate offset
371 InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
372 InstrStage<1, [SW_LS]>],
373 [1, 1]>,
374 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
375 InstrStage<1, [SW_LS]>],
376 [1, 1]>,
377 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
378 InstrStage<1, [SW_DIS1], 0>,
379 InstrStage<1, [SW_DIS2], 0>,
380 InstrStage<1, [SW_LS], 0>,
381 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
382 InstrStage<1, [SW_LS]>],
383 [1, 1]>,
384 //
385 // Register offset
386 InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
387 InstrStage<1, [SW_LS]>],
388 [1, 1, 1]>,
389 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
390 InstrStage<1, [SW_LS]>],
391 [1, 1, 1]>,
392 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
393 InstrStage<1, [SW_DIS1], 0>,
394 InstrStage<1, [SW_DIS2], 0>,
395 InstrStage<1, [SW_LS], 0>,
396 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
397 InstrStage<1, [SW_LS]>],
398 [1, 1, 1]>,
399 //
400 // Scaled register offset
401 InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
402 InstrStage<1, [SW_DIS1], 0>,
403 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
404 InstrStage<1, [SW_LS]>],
405 [1, 1, 1]>,
406 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
407 InstrStage<1, [SW_DIS1], 0>,
408 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
409 InstrStage<1, [SW_LS]>],
410 [1, 1, 1]>,
411 //
412 // Immediate offset with update
413 InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
414 InstrStage<1, [SW_DIS1], 0>,
415 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
416 InstrStage<1, [SW_LS]>],
417 [1, 1, 1]>,
418 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
419 InstrStage<1, [SW_DIS1], 0>,
420 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
421 InstrStage<1, [SW_LS]>],
422 [1, 1, 1]>,
423 //
424 // Register offset with update
425 InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
426 InstrStage<1, [SW_DIS1], 0>,
427 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
428 InstrStage<1, [SW_LS]>],
429 [1, 1, 1, 1]>,
430 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
431 InstrStage<1, [SW_DIS1], 0>,
432 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
433 InstrStage<1, [SW_LS]>],
434 [1, 1, 1, 1]>,
435 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
436 InstrStage<1, [SW_DIS1], 0>,
437 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
438 InstrStage<1, [SW_LS]>],
439 [1, 1, 1, 1]>,
440 //
441 // Scaled register offset with update
442 InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
443 InstrStage<1, [SW_DIS1], 0>,
444 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
445 InstrStage<1, [SW_LS], 0>,
446 InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
447 [3, 1, 1, 1]>,
448 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
449 InstrStage<1, [SW_DIS1], 0>,
450 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
451 InstrStage<1, [SW_LS], 0>,
452 InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
453 [3, 1, 1, 1]>,
454 //
455 // Store multiple
456 InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
457 InstrStage<1, [SW_DIS1], 0>,
458 InstrStage<1, [SW_DIS2], 0>,
459 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
460 InstrStage<1, [SW_LS], 1>,
461 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
462 InstrStage<1, [SW_LS], 1>,
463 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
464 InstrStage<1, [SW_LS]>],
465 [], [], -1>, // dynamic uops
466 //
467 // Store multiple + update
468 InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
469 InstrStage<1, [SW_DIS1], 0>,
470 InstrStage<1, [SW_DIS2], 0>,
471 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
472 InstrStage<1, [SW_LS], 1>,
473 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
474 InstrStage<1, [SW_LS], 1>,
475 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
476 InstrStage<1, [SW_LS]>],
477 [2], [], -1>, // dynamic uops
478
479 //
480 // Preload
481 InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
482
483 // Branch
484 //
485 // no delay slots, so the latency of a branch is unimportant
486 InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
487
488 // FP Special Register to Integer Register File Move
489 InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
490 InstrStage<1, [SW_ALU0, SW_ALU1]>],
491 [1]>,
492 //
493 // Single-precision FP Unary
494 //
495 // Most floating-point moves get issued on ALU0.
496 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
497 InstrStage<1, [SW_ALU0]>],
498 [2, 1]>,
499 //
500 // Double-precision FP Unary
501 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
502 InstrStage<1, [SW_ALU0]>],
503 [2, 1]>,
504
505 //
506 // Single-precision FP Compare
507 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
508 InstrStage<1, [SW_ALU0]>],
509 [1, 1]>,
510 //
511 // Double-precision FP Compare
512 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
513 InstrStage<1, [SW_ALU0]>],
514 [1, 1]>,
515 //
516 // Single to Double FP Convert
517 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
518 InstrStage<1, [SW_ALU1]>],
519 [4, 1]>,
520 //
521 // Double to Single FP Convert
522 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
523 InstrStage<1, [SW_ALU1]>],
524 [4, 1]>,
525
526 //
527 // Single to Half FP Convert
528 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
529 InstrStage<1, [SW_DIS1], 0>,
530 InstrStage<1, [SW_ALU1], 4>,
531 InstrStage<1, [SW_ALU1]>],
532 [6, 1]>,
533 //
534 // Half to Single FP Convert
535 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
536 InstrStage<1, [SW_ALU1]>],
537 [4, 1]>,
538
539 //
540 // Single-Precision FP to Integer Convert
541 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
542 InstrStage<1, [SW_ALU1]>],
543 [4, 1]>,
544 //
545 // Double-Precision FP to Integer Convert
546 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
547 InstrStage<1, [SW_ALU1]>],
548 [4, 1]>,
549 //
550 // Integer to Single-Precision FP Convert
551 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
552 InstrStage<1, [SW_ALU1]>],
553 [4, 1]>,
554 //
555 // Integer to Double-Precision FP Convert
556 InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
557 InstrStage<1, [SW_ALU1]>],
558 [4, 1]>,
559 //
560 // Single-precision FP ALU
561 InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
562 InstrStage<1, [SW_ALU0]>],
563 [2, 1, 1]>,
564 //
565 // Double-precision FP ALU
566 InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
567 InstrStage<1, [SW_ALU0]>],
568 [2, 1, 1]>,
569 //
570 // Single-precision FP Multiply
571 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
572 InstrStage<1, [SW_ALU1]>],
573 [4, 1, 1]>,
574 //
575 // Double-precision FP Multiply
576 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
577 InstrStage<1, [SW_ALU1]>],
578 [6, 1, 1]>,
579 //
580 // Single-precision FP MAC
581 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
582 InstrStage<1, [SW_ALU1]>],
583 [8, 1, 1]>,
584 //
585 // Double-precision FP MAC
586 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
587 InstrStage<1, [SW_ALU1]>],
588 [12, 1, 1]>,
589 //
590 // Single-precision Fused FP MAC
591 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
592 InstrStage<1, [SW_ALU1]>],
593 [8, 1, 1]>,
594 //
595 // Double-precision Fused FP MAC
596 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
597 InstrStage<1, [SW_ALU1]>],
598 [12, 1, 1]>,
599 //
600 // Single-precision FP DIV
601 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
602 InstrStage<1, [SW_ALU1], 0>,
603 InstrStage<15, [SW_FDIV]>],
604 [17, 1, 1]>,
605 //
606 // Double-precision FP DIV
607 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
608 InstrStage<1, [SW_ALU1], 0>,
609 InstrStage<30, [SW_FDIV]>],
610 [32, 1, 1]>,
611 //
612 // Single-precision FP SQRT
613 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
614 InstrStage<1, [SW_ALU1], 0>,
615 InstrStage<15, [SW_FDIV]>],
616 [17, 1]>,
617 //
618 // Double-precision FP SQRT
619 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
620 InstrStage<1, [SW_ALU1], 0>,
621 InstrStage<30, [SW_FDIV]>],
622 [32, 1, 1]>,
623
624 //
625 // Integer to Single-precision Move
626 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
627 InstrStage<1, [SW_DIS1], 0>,
628 InstrStage<1, [SW_LS], 4>,
629 InstrStage<1, [SW_ALU0]>],
630 [6, 1]>,
631 //
632 // Integer to Double-precision Move
633 InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
634 InstrStage<1, [SW_LS]>],
635 [4, 1]>,
636 //
637 // Single-precision to Integer Move
638 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
639 InstrStage<1, [SW_LS]>],
640 [3, 1]>,
641 //
642 // Double-precision to Integer Move
643 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
644 InstrStage<1, [SW_DIS1], 0>,
645 InstrStage<1, [SW_LS], 3>,
646 InstrStage<1, [SW_LS]>],
647 [3, 4, 1]>,
648 //
649 // Single-precision FP Load
650 InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
651 InstrStage<1, [SW_LS]>],
652 [4, 1]>,
653 //
654 // Double-precision FP Load
655 InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
656 InstrStage<1, [SW_LS]>],
657 [4, 1]>,
658 //
659 // FP Load Multiple
660 // FIXME: Assumes a single Q register.
661 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
662 InstrStage<1, [SW_LS]>],
663 [1, 1, 1, 4], [], -1>, // dynamic uops
664 //
665 // FP Load Multiple + update
666 // FIXME: Assumes a single Q register.
667 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
668 InstrStage<1, [SW_DIS1], 0>,
669 InstrStage<1, [SW_LS], 4>,
670 InstrStage<1, [SW_ALU0, SW_ALU1]>],
671 [2, 1, 1, 1, 4], [], -1>, // dynamic uops
672 //
673 // Single-precision FP Store
674 InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
675 InstrStage<1, [SW_LS]>],
676 [1, 1]>,
677 //
678 // Double-precision FP Store
679 InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
680 InstrStage<1, [SW_LS]>],
681 [1, 1]>,
682 //
683 // FP Store Multiple
684 // FIXME: Assumes a single Q register.
685 InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
686 InstrStage<1, [SW_LS]>],
687 [1, 1, 1], [], -1>, // dynamic uops
688 //
689 // FP Store Multiple + update
690 // FIXME: Assumes a single Q register.
691 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
692 InstrStage<1, [SW_DIS1], 0>,
693 InstrStage<1, [SW_LS], 4>,
694 InstrStage<1, [SW_ALU0, SW_ALU1]>],
695 [2, 1, 1, 1], [], -1>, // dynamic uops
696 // NEON
697 //
698 // Double-register Integer Unary
699 InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
700 InstrStage<1, [SW_ALU0]>],
701 [4, 1]>,
702 //
703 // Quad-register Integer Unary
704 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
705 InstrStage<1, [SW_ALU0]>],
706 [4, 1]>,
707 //
708 // Double-register Integer Q-Unary
709 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
710 InstrStage<1, [SW_ALU0]>],
711 [4, 1]>,
712 //
713 // Quad-register Integer CountQ-Unary
714 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
715 InstrStage<1, [SW_ALU0]>],
716 [4, 1]>,
717 //
718 // Double-register Integer Binary
719 InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
720 InstrStage<1, [SW_ALU0]>],
721 [2, 1, 1]>,
722 //
723 // Quad-register Integer Binary
724 InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
725 InstrStage<1, [SW_ALU0]>],
726 [2, 1, 1]>,
727 //
728 // Double-register Integer Subtract
729 InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
730 InstrStage<1, [SW_ALU0]>],
731 [2, 1, 1]>,
732 //
733 // Quad-register Integer Subtract
734 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
735 InstrStage<1, [SW_ALU0]>],
736 [2, 1, 1]>,
737 //
738 // Double-register Integer Shift
739 InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
740 InstrStage<1, [SW_ALU0]>],
741 [2, 1, 1]>,
742 //
743 // Quad-register Integer Shift
744 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
745 InstrStage<1, [SW_ALU0]>],
746 [2, 1, 1]>,
747 //
748 // Double-register Integer Shift (4 cycle)
749 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
750 InstrStage<1, [SW_ALU0]>],
751 [4, 1, 1]>,
752 //
753 // Quad-register Integer Shift (4 cycle)
754 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
755 InstrStage<1, [SW_ALU0]>],
756 [4, 1, 1]>,
757 //
758 // Double-register Integer Binary (4 cycle)
759 InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
760 InstrStage<1, [SW_ALU0]>],
761 [4, 1, 1]>,
762 //
763 // Quad-register Integer Binary (4 cycle)
764 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
765 InstrStage<1, [SW_ALU0]>],
766 [4, 1, 1]>,
767 //
768 // Double-register Integer Subtract (4 cycle)
769 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
770 InstrStage<1, [SW_ALU0]>],
771 [4, 1, 1]>,
772 //
773 // Quad-register Integer Subtract (4 cycle)
774 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
775 InstrStage<1, [SW_ALU0]>],
776 [4, 1, 1]>,
777
778 //
779 // Double-register Integer Count
780 InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
781 InstrStage<1, [SW_ALU0]>],
782 [2, 1, 1]>,
783 //
784 // Quad-register Integer Count
785 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
786 InstrStage<1, [SW_ALU0]>],
787 [2, 1, 1]>,
788 //
789 // Double-register Absolute Difference and Accumulate
790 InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
791 InstrStage<1, [SW_ALU0]>],
792 [4, 1, 1, 1]>,
793 //
794 // Quad-register Absolute Difference and Accumulate
795 InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
796 InstrStage<1, [SW_ALU0]>],
797 [4, 1, 1, 1]>,
798 //
799 // Double-register Integer Pair Add Long
800 InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
801 InstrStage<1, [SW_ALU0]>],
802 [4, 1, 1]>,
803 //
804 // Quad-register Integer Pair Add Long
805 InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
806 InstrStage<1, [SW_ALU0]>],
807 [4, 1, 1]>,
808
809 //
810 // Double-register Integer Multiply (.8, .16)
811 InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
812 InstrStage<1, [SW_ALU1]>],
813 [4, 1, 1]>,
814 //
815 // Quad-register Integer Multiply (.8, .16)
816 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
817 InstrStage<1, [SW_ALU1]>],
818 [4, 1, 1]>,
819
820 //
821 // Double-register Integer Multiply (.32)
822 InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
823 InstrStage<1, [SW_ALU1]>],
824 [4, 1, 1]>,
825 //
826 // Quad-register Integer Multiply (.32)
827 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
828 InstrStage<1, [SW_ALU1]>],
829 [4, 1, 1]>,
830 //
831 // Double-register Integer Multiply-Accumulate (.8, .16)
832 InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
833 InstrStage<1, [SW_ALU1]>],
834 [4, 1, 1, 1]>,
835 //
836 // Double-register Integer Multiply-Accumulate (.32)
837 InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
838 InstrStage<1, [SW_ALU1]>],
839 [4, 1, 1, 1]>,
840 //
841 // Quad-register Integer Multiply-Accumulate (.8, .16)
842 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
843 InstrStage<1, [SW_ALU1]>],
844 [4, 1, 1, 1]>,
845 //
846 // Quad-register Integer Multiply-Accumulate (.32)
847 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
848 InstrStage<1, [SW_ALU1]>],
849 [4, 1, 1, 1]>,
850
851 //
852 // Move
853 InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
854 InstrStage<1, [SW_ALU0]>],
855 [2, 1]>,
856 //
857 // Move Immediate
858 InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
859 InstrStage<1, [SW_ALU0]>],
860 [2]>,
861 //
862 // Double-register Permute Move
863 InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
864 InstrStage<1, [SW_ALU1]>],
865 [2, 1]>,
866 //
867 // Quad-register Permute Move
868 InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
869 InstrStage<1, [SW_ALU1]>],
870 [2, 1]>,
871 //
872 // Integer to Single-precision Move
873 InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
874 InstrStage<1, [SW_DIS1], 0>,
875 InstrStage<1, [SW_LS], 4>,
876 InstrStage<1, [SW_ALU0]>],
877 [6, 1]>,
878 //
879 // Integer to Double-precision Move
880 InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
881 InstrStage<1, [SW_LS]>],
882 [4, 1, 1]>,
883 //
884 // Single-precision to Integer Move
885 InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
886 InstrStage<1, [SW_LS]>],
887 [3, 1]>,
888 //
889 // Double-precision to Integer Move
890 InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
891 InstrStage<1, [SW_DIS1], 0>,
892 InstrStage<1, [SW_LS], 3>,
893 InstrStage<1, [SW_LS]>],
894 [3, 4, 1]>,
895 //
896 // Integer to Lane Move
897 // FIXME: I think this is correct, but it is not clear from the tuning guide.
898 InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
899 InstrStage<1, [SW_DIS1], 0>,
900 InstrStage<1, [SW_LS], 4>,
901 InstrStage<1, [SW_ALU0]>],
902 [6, 1]>,
903
904 //
905 // Vector narrow move
906 InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
907 InstrStage<1, [SW_ALU1]>],
908 [2, 1]>,
909 //
910 // Double-register FP Unary
911 // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
912 // and they issue on a different pipeline.
913 InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
914 InstrStage<1, [SW_ALU0]>],
915 [2, 1]>,
916 //
917 // Quad-register FP Unary
918 // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
919 // and they issue on a different pipeline.
920 InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
921 InstrStage<1, [SW_ALU0]>],
922 [2, 1]>,
923 //
924 // Double-register FP Binary
925 // FIXME: We're using this itin for many instructions.
926 InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
927 InstrStage<1, [SW_ALU0]>],
928 [4, 1, 1]>,
929
930 //
931 // VPADD, etc.
932 InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
933 InstrStage<1, [SW_ALU0]>],
934 [4, 1, 1]>,
935 //
936 // Double-register FP VMUL
937 InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
938 InstrStage<1, [SW_ALU1]>],
939 [4, 1, 1]>,
940 //
941 // Quad-register FP Binary
942 InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
943 InstrStage<1, [SW_ALU0]>],
944 [4, 1, 1]>,
945 //
946 // Quad-register FP VMUL
947 InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
948 InstrStage<1, [SW_ALU1]>],
949 [4, 1, 1]>,
950 //
951 // Double-register FP Multiple-Accumulate
952 InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
953 InstrStage<1, [SW_ALU1]>],
954 [8, 1, 1]>,
955 //
956 // Quad-register FP Multiple-Accumulate
957 InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
958 InstrStage<1, [SW_ALU1]>],
959 [8, 1, 1]>,
960 //
961 // Double-register Fused FP Multiple-Accumulate
962 InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
963 InstrStage<1, [SW_ALU1]>],
964 [8, 1, 1]>,
965 //
966 // Quad-register FusedF P Multiple-Accumulate
967 InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
968 InstrStage<1, [SW_ALU1]>],
969 [8, 1, 1]>,
970 //
971 // Double-register Reciprical Step
972 InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
973 InstrStage<1, [SW_ALU1]>],
974 [8, 1, 1]>,
975 //
976 // Quad-register Reciprical Step
977 InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
978 InstrStage<1, [SW_ALU1]>],
979 [8, 1, 1]>,
980 //
981 // Double-register Permute
982 // FIXME: The latencies are unclear from the documentation.
983 InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
984 InstrStage<1, [SW_DIS1], 0>,
985 InstrStage<1, [SW_DIS2], 0>,
986 InstrStage<1, [SW_ALU1], 2>,
987 InstrStage<1, [SW_ALU1], 2>,
988 InstrStage<1, [SW_ALU1]>],
989 [3, 4, 3, 4]>,
990 //
991 // Quad-register Permute
992 // FIXME: The latencies are unclear from the documentation.
993 InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
994 InstrStage<1, [SW_DIS1], 0>,
995 InstrStage<1, [SW_DIS2], 0>,
996 InstrStage<1, [SW_ALU1], 2>,
997 InstrStage<1, [SW_ALU1], 2>,
998 InstrStage<1, [SW_ALU1]>],
999 [3, 4, 3, 4]>,
1000 //
1001 // Quad-register Permute (3 cycle issue on A9)
1002 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
1003 InstrStage<1, [SW_DIS1], 0>,
1004 InstrStage<1, [SW_DIS2], 0>,
1005 InstrStage<1, [SW_ALU1], 2>,
1006 InstrStage<1, [SW_ALU1], 2>,
1007 InstrStage<1, [SW_ALU1]>],
1008 [3, 4, 3, 4]>,
1009
1010 //
1011 // Double-register VEXT
1012 InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1013 InstrStage<1, [SW_ALU1]>],
1014 [2, 1, 1]>,
1015 //
1016 // Quad-register VEXT
1017 InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1018 InstrStage<1, [SW_ALU1]>],
1019 [2, 1, 1]>,
1020 //
1021 // VTB
1022 InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1023 InstrStage<1, [SW_ALU1]>],
1024 [2, 1, 1]>,
1025 InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
1026 InstrStage<1, [SW_DIS1], 0>,
1027 InstrStage<1, [SW_ALU1], 2>,
1028 InstrStage<1, [SW_ALU1]>],
1029 [4, 1, 3, 3]>,
1030 InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
1031 InstrStage<1, [SW_DIS1], 0>,
1032 InstrStage<1, [SW_DIS2], 0>,
1033 InstrStage<1, [SW_ALU1], 2>,
1034 InstrStage<1, [SW_ALU1], 2>,
1035 InstrStage<1, [SW_ALU1]>],
1036 [6, 1, 3, 5, 5]>,
1037 InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
1038 InstrStage<1, [SW_DIS1], 0>,
1039 InstrStage<1, [SW_DIS2], 0>,
1040 InstrStage<1, [SW_ALU1], 2>,
1041 InstrStage<1, [SW_ALU1], 2>,
1042 InstrStage<1, [SW_ALU1], 2>,
1043 InstrStage<1, [SW_ALU1]>],
1044 [8, 1, 3, 5, 7, 7]>,
1045 //
1046 // VTBX
1047 InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1048 InstrStage<1, [SW_ALU1]>],
1049 [2, 1, 1]>,
1050 InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
1051 InstrStage<1, [SW_DIS1], 0>,
1052 InstrStage<1, [SW_ALU1], 2>,
1053 InstrStage<1, [SW_ALU1]>],
1054 [4, 1, 3, 3]>,
1055 InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
1056 InstrStage<1, [SW_DIS1], 0>,
1057 InstrStage<1, [SW_DIS2], 0>,
1058 InstrStage<1, [SW_ALU1], 2>,
1059 InstrStage<1, [SW_ALU1], 2>,
1060 InstrStage<1, [SW_ALU1]>],
1061 [6, 1, 3, 5, 5]>,
1062 InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
1063 InstrStage<1, [SW_DIS1], 0>,
1064 InstrStage<1, [SW_DIS2], 0>,
1065 InstrStage<1, [SW_ALU1], 2>,
1066 InstrStage<1, [SW_ALU1], 2>,
1067 InstrStage<1, [SW_ALU1], 2>,
1068 InstrStage<1, [SW_ALU1]>],
1069 [8, 1, 3, 5, 7, 7]>
1070]>;
1071
1072// ===---------------------------------------------------------------------===//
1073// This following definitions describe the simple machine model which
1074// will replace itineraries.
1075
1076// Swift machine model for scheduling and other instruction cost heuristics.
1077def SwiftModel : SchedMachineModel {
1078 let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
Andrew Trick18dc3da2013-06-15 04:50:02 +00001079 let MicroOpBufferSize = 45; // Based on NEON renamed registers.
Bob Wilsone8a549c2012-09-29 21:43:49 +00001080 let LoadLatency = 3;
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001081 let MispredictPenalty = 14; // A branch direction mispredict.
Bob Wilsone8a549c2012-09-29 21:43:49 +00001082
1083 let Itineraries = SwiftItineraries;
1084}
1085
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001086// Swift predicates.
1087def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
1088
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001089// Swift resource mapping.
1090let SchedModel = SwiftModel in {
1091 // Processor resources.
1092 def SwiftUnitP01 : ProcResource<2>; // ALU unit.
1093 def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
1094 def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
1095 def SwiftUnitP2 : ProcResource<1>; // LS unit.
1096 def SwiftUnitDiv : ProcResource<1>;
1097
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001098 // Generic resource requirements.
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00001099 def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
1100 def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
1101 def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
1102 def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
1103 def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1104 let Latency = 4;
1105 }
1106 def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1107 let Latency = 6;
1108 }
1109 def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
1110 def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
1111 def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
1112 def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
1113 def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
1114 def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
1115 def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
1116 def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001117 def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00001118 def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
1119 SwiftUnitP01]> {
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001120 let Latency = 3;
1121 let NumMicroOps = 2;
1122 }
1123 def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
1124 let Latency = 3;
1125 let NumMicroOps = 3;
1126 let ResourceCycles = [3];
1127 }
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00001128 // Plain load without writeback.
1129 def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
1130 let Latency = 3;
1131 }
1132 def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
1133 let Latency = 4;
1134 }
1135 // A store does not write to a register.
1136 def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
1137 let Latency = 0;
1138 }
1139 foreach Num = 1-4 in {
1140 def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
1141 }
1142 def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
1143 SwiftWriteP01OneCycle,
1144 SwiftWriteP2ThreeCycle]>;
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001145 // 4.2.4 Arithmetic and Logical.
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001146 // ALU operation register shifted by immediate variant.
1147 def SwiftWriteALUsi : SchedWriteVariant<[
1148 // lsl #2, lsl #1, or lsr #1.
1149 SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001150 SchedVar<NoSchedPred, [WriteALU]>
1151 ]>;
1152 def SwiftWriteALUsr : SchedWriteVariant<[
1153 SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001154 SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001155 ]>;
1156 def SwiftWriteALUSsr : SchedWriteVariant<[
1157 SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001158 SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001159 ]>;
1160 def SwiftReadAdvanceALUsr : SchedReadVariant<[
1161 SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001162 SchedVar<NoSchedPred, [NoReadAdvance]>
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001163 ]>;
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001164 // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001165 // AND,BIC,EOR,ORN,ORR
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001166 // CLZ,RBIT,REV,REV16,REVSH,PKH
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001167 def : WriteRes<WriteALU, [SwiftUnitP01]>;
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001168 def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
1169 def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
1170 def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00001171 def : ReadAdvance<ReadALU, 0>;
Arnold Schwaighofer5dde1f32013-04-05 04:42:00 +00001172 def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001173
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00001174
1175 def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
1176 SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
1177 SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
1178 ]>;
1179
Arnold Schwaighoferfb6b9f42013-04-05 05:01:06 +00001180 // 4.2.5 Integer comparison
1181 def : WriteRes<WriteCMP, [SwiftUnitP01]>;
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00001182 def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
1183 def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
Arnold Schwaighofer41a7b062013-06-06 19:49:46 +00001184
1185 // 4.2.6 Shift, Move
1186 // Shift
1187 // ASR,LSL,ROR,RRX
1188 // MOV(register-shiftedregister) MVN(register-shiftedregister)
1189 // Move
1190 // MOV,MVN
1191 // MOVT
1192 // Sign/Zero extension
1193 def : InstRW<[SwiftWriteP01OneCycle],
1194 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
1195 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
1196 "t2UXTB16")>;
1197 // Pseudo instructions.
1198 def : InstRW<[SwiftWriteP01OneCycle2x],
1199 (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
1200 "t2MOVi32imm", "t2MOV_ga_dyn")>;
1201 def : InstRW<[SwiftWriteP01OneCycle3x],
1202 (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
1203 def : InstRW<[SwiftWriteP01OneCycle2x_load],
1204 (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
1205
1206 def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
1207
1208 def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
1209 SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
1210 SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]>
1211 ]>;
1212
1213 // 4.2.7 Select
1214 // SEL
1215 def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
1216
1217 // 4.2.8 Bitfield
1218 // BFI,BFC, SBFX,UBFX
1219 def : InstRW< [SwiftWriteP01TwoCycle],
1220 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
1221 "(t|t2)UBFX", "(t|t2)SBFX")>;
1222
1223 // 4.2.9 Saturating arithmetic
1224 def : InstRW< [SwiftWriteP01TwoCycle],
1225 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
1226 "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
1227 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
1228 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
1229 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
1230 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
1231
1232 // 4.2.10 Parallel Arithmetic
1233 // Not flag setting.
1234 def : InstRW< [SwiftWriteALUsr],
1235 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
1236 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
1237 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
1238 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
1239 // Flag setting.
1240 def : InstRW< [SwiftWriteP01TwoCycle],
1241 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
1242 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
1243 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
1244 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
1245 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
1246 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
1247
1248 // 4.2.11 Sum of Absolute Difference
1249 def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
1250 def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
1251 (instregex "USADA8")>;
1252
1253 // 4.2.12 Integer Multiply (32-bit result)
1254 // Two sources.
1255 def : InstRW< [SwiftWriteP0FourCycle],
1256 (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
1257 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
1258 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
1259 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
1260
1261 def SwiftWriteP0P01FiveCycleTwoUops :
1262 SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1263 let Latency = 5;
1264 }
1265
1266 def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
1267 SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
1268 SchedVar<NoSchedPred, [ SwiftWriteP0FourCycle ]>
1269 ]>;
1270
1271 def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
1272 SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
1273 SchedVar<NoSchedPred, [ReadALU]>
1274 ]>;
1275
1276 // Multiply accumulate, three sources
1277 def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1278 SwiftReadAdvanceFourCyclesPred],
1279 (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
1280 "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
1281 "t2SMMLSR")>;
1282
1283 // 4.2.13 Integer Multiply (32-bit result, Q flag)
1284 def : InstRW< [SwiftWriteP0FourCycle],
1285 (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
1286 def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1287 SwiftReadAdvanceFourCyclesPred],
1288 (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
1289 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
1290 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
1291 def : InstRW< [SwiftPredP0P01FourFiveCycle],
1292 (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
1293
1294 def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1295 let Latency = 5;
1296 let NumMicroOps = 3;
1297 let ResourceCycles = [2, 1];
1298 }
1299 def SwiftWrite1Cycle : SchedWriteRes<[]> {
1300 let Latency = 1;
1301 let NumMicroOps = 0;
1302 }
1303 def SwiftWrite5Cycle : SchedWriteRes<[]> {
1304 let Latency = 5;
1305 let NumMicroOps = 0;
1306 }
1307 def SwiftWrite6Cycle : SchedWriteRes<[]> {
1308 let Latency = 6;
1309 let NumMicroOps = 0;
1310 }
1311
1312 // 4.2.14 Integer Multiply, Long
1313 def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
1314 (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
1315
1316 def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1317 let Latency = 7;
1318 let NumMicroOps = 5;
1319 let ResourceCycles = [2, 3];
1320 }
1321
1322 // 4.2.15 Integer Multiply Accumulate, Long
1323 // 4.2.16 Integer Multiply Accumulate, Dual
1324 // 4.2.17 Integer Multiply Accumulate Accumulate, Long
1325 // We are being a bit inaccurate here.
1326 def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
1327 SchedReadAdvance<4>, SchedReadAdvance<3>],
1328 (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
1329 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
1330 "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
1331 "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
1332 "t2UMAAL")>;
1333
1334 def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
1335 let NumMicroOps = 1;
1336 let Latency = 14;
1337 let ResourceCycles = [1, 14];
1338 }
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00001339 // 4.2.18 Integer Divide
1340 def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
Arnold Schwaighoferc5893da2013-06-07 01:16:15 +00001341 def : InstRW <[SwiftDiv],
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00001342 (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
Arnold Schwaighoferdff8e4c2013-06-06 20:11:56 +00001343
1344 // 4.2.19 Integer Load Single Element
1345 // 4.2.20 Integer Load Signextended
1346 def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1347 let Latency = 3;
Arnold Schwaighofer66eb9212013-09-30 15:56:34 +00001348 let NumMicroOps = 2;
Arnold Schwaighoferdff8e4c2013-06-06 20:11:56 +00001349 }
1350 def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1351 let Latency = 4;
Arnold Schwaighofer66eb9212013-09-30 15:56:34 +00001352 let NumMicroOps = 2;
Arnold Schwaighoferdff8e4c2013-06-06 20:11:56 +00001353 }
1354 def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
1355 SwiftUnitP01]> {
1356 let Latency = 4;
Arnold Schwaighofer66eb9212013-09-30 15:56:34 +00001357 let NumMicroOps = 3;
Arnold Schwaighoferdff8e4c2013-06-06 20:11:56 +00001358 }
1359 def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
1360 let Latency = 3;
Arnold Schwaighofer66eb9212013-09-30 15:56:34 +00001361 let NumMicroOps = 2;
Arnold Schwaighoferdff8e4c2013-06-06 20:11:56 +00001362 }
1363 def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
Arnold Schwaighofer66eb9212013-09-30 15:56:34 +00001364 SwiftUnitP01]> {
Arnold Schwaighoferdff8e4c2013-06-06 20:11:56 +00001365 let Latency = 3;
Arnold Schwaighofer66eb9212013-09-30 15:56:34 +00001366 let NumMicroOps = 3;
Arnold Schwaighoferdff8e4c2013-06-06 20:11:56 +00001367 }
1368 def SwiftWrBackOne : SchedWriteRes<[]> {
1369 let Latency = 1;
1370 let NumMicroOps = 0;
1371 }
1372 def SwiftWriteLdFour : SchedWriteRes<[]> {
1373 let Latency = 4;
1374 let NumMicroOps = 0;
1375 }
1376 // Not accurate.
1377 def : InstRW<[SwiftWriteP2ThreeCycle],
1378 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
1379 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)",
1380 "tLDR(r|i|spi|pci|pciASM)")>;
1381 def : InstRW<[SwiftWriteP2ThreeCycle],
1382 (instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
1383 def : InstRW<[SwiftWriteP2P01FourCyle],
1384 (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
1385 "t2LDRpci_pic", "tLDRS(B|H)")>;
1386 def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne],
1387 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
1388 "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
1389 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
1390 def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
1391 (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
1392 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
1393
1394 // 4.2.21 Integer Dual Load
1395 // Not accurate.
1396 def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour],
1397 (instregex "t2LDRDi8", "LDRD$")>;
1398 def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne],
1399 (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
1400
1401 // 4.2.22 Integer Load, Multiple
1402 // NumReg = 1 .. 16
1403 foreach Lat = 3-25 in {
1404 def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
1405 let Latency = Lat;
1406 }
Arnold Schwaighofer66eb9212013-09-30 15:56:34 +00001407 def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> {
1408 let Latency = Lat;
1409 let NumMicroOps = 0;
1410 }
Arnold Schwaighoferdff8e4c2013-06-06 20:11:56 +00001411 }
1412 // Predicate.
1413 foreach NumAddr = 1-16 in {
1414 def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
1415 }
1416 def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
1417 def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
1418 def SwiftWriteLM : SchedWriteVariant<[
1419 SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>,
1420 SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1421 SwiftWriteLM5Cy]>,
1422 SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1423 SwiftWriteLM5Cy, SwiftWriteLM6Cy]>,
1424 SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1425 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1426 SwiftWriteLM7Cy]>,
1427 SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1428 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1429 SwiftWriteLM7Cy, SwiftWriteLM8Cy]>,
1430 SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1431 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1432 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1433 SwiftWriteLM9Cy]>,
1434 SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1435 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1436 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1437 SwiftWriteLM9Cy, SwiftWriteLM10Cy]>,
1438 SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1439 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1440 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1441 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1442 SwiftWriteLM11Cy]>,
1443 SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1444 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1445 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1446 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1447 SwiftWriteLM11Cy, SwiftWriteLM12Cy]>,
1448 SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1449 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1450 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1451 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1452 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1453 SwiftWriteLM13Cy]>,
1454 SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1455 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1456 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1457 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1458 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1459 SwiftWriteLM13Cy, SwiftWriteLM14Cy]>,
1460 SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1461 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1462 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1463 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1464 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1465 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1466 SwiftWriteLM15Cy]>,
1467 SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1468 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1469 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1470 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1471 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1472 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1473 SwiftWriteLM15Cy, SwiftWriteLM16Cy]>,
1474 SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1475 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1476 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1477 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1478 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1479 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1480 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
1481 SwiftWriteLM17Cy]>,
1482 SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1483 SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1484 SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1485 SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1486 SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1487 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1488 SwiftWriteLM15Cy, SwiftWriteLM16Cy,
1489 SwiftWriteLM17Cy, SwiftWriteLM18Cy]>,
1490 // Unknow number of registers, just use resources for two registers.
1491 SchedVar<NoSchedPred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1492 SwiftWriteLM5CyNo, SwiftWriteLM6CyNo,
1493 SwiftWriteLM7CyNo, SwiftWriteLM8CyNo,
1494 SwiftWriteLM9CyNo, SwiftWriteLM10CyNo,
1495 SwiftWriteLM11CyNo, SwiftWriteLM12CyNo,
1496 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
1497 SwiftWriteLM15CyNo, SwiftWriteLM16CyNo,
1498 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]>
1499
1500 ]> { let Variadic=1; }
1501
1502 def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB],
1503 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
1504 "(t|sys)LDM(IA|DA|DB|IB)$")>;
1505 def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM],
1506 (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
1507 "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
1508 def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
1509 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
1510 // 4.2.23 Integer Store, Single Element
1511 def : InstRW<[SwiftWriteP2],
1512 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
1513 "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
1514
1515 def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2],
1516 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
1517 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
1518 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
1519 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
1520
1521 // 4.2.24 Integer Store, Dual
1522 def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle],
1523 (instregex "STRD$", "t2STRDi8")>;
1524 def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2,
1525 SwiftWriteP01OneCycle],
1526 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
1527
1528 // 4.2.25 Integer Store, Multiple
1529 def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1530 let Latency = 0;
Arnold Schwaighofer66eb9212013-09-30 15:56:34 +00001531 let NumMicroOps = 2;
Arnold Schwaighoferdff8e4c2013-06-06 20:11:56 +00001532 }
1533 foreach NumAddr = 1-16 in {
1534 def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
1535 }
1536 def SwiftWriteSTM : SchedWriteVariant<[
1537 SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>,
1538 SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>,
1539 SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>,
1540 SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>,
1541 SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>,
1542 SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>,
1543 SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>,
1544 SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>,
1545 SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>,
1546 SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>,
1547 SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>,
1548 SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>,
1549 SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>,
1550 SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>,
1551 SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>,
1552 // Unknow number of registers, just use resources for two registers.
1553 SchedVar<NoSchedPred, [SwiftWriteSTM2]>
1554 ]>;
1555 def : InstRW<[SwiftWriteSTM],
1556 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
1557 def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
1558 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
1559 "PUSH", "tPUSH")>;
1560
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00001561 // 4.2.26 Branch
1562 def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
1563 def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
1564 def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
1565
Arnold Schwaighofereac54472013-06-06 20:26:18 +00001566 // 4.2.27 Not issued
1567 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
1568 def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
1569
1570 // 4.2.28 Advanced SIMD, Integer, 2 cycle
1571 def : InstRW<[SwiftWriteP0TwoCycle],
1572 (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
1573 "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
1574 "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
1575 "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
1576 "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
1577
1578 def : InstRW<[SwiftWriteP1TwoCycle],
1579 (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
1580
1581 // 4.2.29 Advanced SIMD, Integer, 4 cycle
1582 // 4.2.30 Advanced SIMD, Integer with Accumulate
1583 def : InstRW<[SwiftWriteP0FourCycle],
1584 (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
1585 "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
1586 "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
1587 "VQSUB")>;
1588 def : InstRW<[SwiftWriteP1FourCycle],
1589 (instregex "VRECPE", "VRSQRTE")>;
1590
1591 // 4.2.31 Advanced SIMD, Add and Shift with Narrow
1592 def : InstRW<[SwiftWriteP0P1FourCycle],
1593 (instregex "VADDHN", "VSUBHN", "VSHRN")>;
1594 def : InstRW<[SwiftWriteP0P1SixCycle],
1595 (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN",
1596 "VQRSHRN", "VQRSHRUN")>;
1597
1598 // 4.2.32 Advanced SIMD, Vector Table Lookup
1599 foreach Num = 1-4 in {
1600 def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>;
1601 }
1602 def : InstRW<[SwiftWrite1xP1TwoCycle],
1603 (instregex "VTB(L|X)1")>;
1604 def : InstRW<[SwiftWrite2xP1TwoCycle],
1605 (instregex "VTB(L|X)2")>;
1606 def : InstRW<[SwiftWrite3xP1TwoCycle],
1607 (instregex "VTB(L|X)3")>;
1608 def : InstRW<[SwiftWrite4xP1TwoCycle],
1609 (instregex "VTB(L|X)4")>;
1610
1611 // 4.2.33 Advanced SIMD, Transpose
1612 def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle,
1613 SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>],
1614 (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
1615
1616 // 4.2.34 Advanced SIMD and VFP, Floating Point
1617 def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>;
1618 def : InstRW<[SwiftWriteP0FourCycle],
1619 (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>;
1620 def : InstRW<[SwiftWriteP0FourCycle],
1621 (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX",
1622 "VPMIN")>;
1623 def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>;
1624 def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>;
1625
1626 // 4.2.35 Advanced SIMD and VFP, Multiply
1627 def : InstRW<[SwiftWriteP1FourCycle],
1628 (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
1629 "VMULL", "VQDMULL")>;
1630 def : InstRW<[SwiftWriteP1SixCycle],
1631 (instregex "VMULD", "VNMULD")>;
1632 def : InstRW<[SwiftWriteP1FourCycle],
1633 (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
1634 "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
1635 def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>;
1636 def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>;
1637
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00001638 // 4.2.36 Advanced SIMD and VFP, Convert
1639 def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
1640 // Fixpoint conversions.
1641 def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
Arnold Schwaighofereac54472013-06-06 20:26:18 +00001642
1643 // 4.2.37 Advanced SIMD and VFP, Move
1644 def : InstRW<[SwiftWriteP0TwoCycle],
1645 (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
1646 "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
1647 "FCONST(D|S)")>;
1648 def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
1649 def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
1650 (instregex "VQMOVN")>;
1651 def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
1652 def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
1653 (instregex "VDUP(8|16|32)")>;
1654 def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
1655 def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>],
1656 (instregex "VMOVSR$", "VSETLN")>;
1657 def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle],
1658 (instregex "VMOVRR(D|S)$")>;
1659 def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>;
1660 def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>,
1661 WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle,
1662 SwiftWriteP1TwoCycle]>],
1663 (instregex "VMOVSRR$")>;
1664 def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>],
1665 (instregex "VGETLN(u|i)")>;
1666 def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle,
1667 SwiftWriteP01OneCycle]>],
1668 (instregex "VGETLNs")>;
1669
1670 // 4.2.38 Advanced SIMD and VFP, Move FPSCR
1671 // Serializing instructions.
1672 def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
1673 let Latency = 15;
1674 let ResourceCycles = [15];
1675 }
1676 def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
1677 let Latency = 15;
1678 let ResourceCycles = [15];
1679 }
1680 def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
1681 let Latency = 15;
1682 let ResourceCycles = [15];
1683 }
1684 def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
1685 (instregex "VMRS")>;
1686 def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
1687 (instregex "VMSR")>;
1688 // Not serializing.
1689 def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>;
1690
Arnold Schwaighofera30936a2013-06-07 00:04:28 +00001691 // 4.2.39 Advanced SIMD and VFP, Load Single Element
1692 def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>;
1693
1694 // 4.2.40 Advanced SIMD and VFP, Store Single Element
1695 def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>;
1696
1697 // 4.2.41 Advanced SIMD and VFP, Load Multiple
1698 // 4.2.42 Advanced SIMD and VFP, Store Multiple
1699
1700 // Resource requirement for permuting, just reserves the resources.
1701 foreach Num = 1-28 in {
1702 def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
1703 let Latency = 0;
1704 let NumMicroOps = Num;
1705 let ResourceCycles = [Num];
1706 }
1707 }
1708
1709 // Pre RA pseudos - load/store to a Q register as a D register pair.
1710 def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>;
1711
1712 // Post RA not modelled accurately. We assume that register use of width 64
1713 // bit maps to a D register, 128 maps to a Q register. Not all different kinds
1714 // are accurately represented.
1715 def SwiftWriteVLDM : SchedWriteVariant<[
1716 // Load of one S register.
1717 SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>,
1718 // Load of one D register.
1719 SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>,
1720 // Load of 3 S register.
1721 SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1722 SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
1723 SwiftVLDMPerm3]>,
Alp Tokercb402912014-01-24 17:20:08 +00001724 // Load of a Q register (not necessarily true). We should not be mapping to
Arnold Schwaighofera30936a2013-06-07 00:04:28 +00001725 // 4 S registers, either.
1726 SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
1727 SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
1728 // Load of 5 S registers.
1729 SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1730 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
1731 SwiftWriteLM17CyNo, SwiftWriteP01OneCycle,
1732 SwiftVLDMPerm5]>,
1733 // Load of 3 D registers. (Must also be able to handle s register list -
1734 // though, not accurate)
1735 SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1736 SwiftWriteLM10Cy, SwiftWriteLM14CyNo,
1737 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1738 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
1739 // Load of 7 S registers.
1740 SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1741 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1742 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1743 SwiftWriteLM21CyNo, SwiftWriteP01OneCycle,
1744 SwiftVLDMPerm7]>,
1745 // Load of two Q registers.
1746 SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1747 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
1748 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1749 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1750 SwiftWriteP01OneCycle, SwiftVLDMPerm2]>,
1751 // Load of 9 S registers.
1752 SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1753 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1754 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1755 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1756 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1757 SwiftVLDMPerm9]>,
1758 // Load of 5 D registers.
1759 SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1760 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
1761 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1762 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1763 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1764 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
1765 // Inaccurate: reuse describtion from 9 S registers.
1766 SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1767 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1768 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1769 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1770 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1771 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1772 SwiftVLDMPerm9]>,
1773 // Load of three Q registers.
1774 SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1775 SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1776 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1777 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1778 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1779 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1780 SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
1781 // Inaccurate: reuse describtion from 9 S registers.
1782 SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1783 SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1784 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1785 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1786 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1787 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1788 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1789 SwiftVLDMPerm9]>,
1790 // Load of 7 D registers inaccurate.
1791 SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1792 SwiftWriteLM10Cy, SwiftWriteLM14Cy,
1793 SwiftWriteLM14Cy, SwiftWriteLM14CyNo,
1794 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1795 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1796 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1797 SwiftWriteP01OneCycle, SwiftVLDMPerm7]>,
1798 SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1799 SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1800 SwiftWriteLM17Cy, SwiftWriteLM18CyNo,
1801 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1802 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1803 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1804 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1805 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1806 SwiftVLDMPerm9]>,
1807 // Load of 4 Q registers.
1808 SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy,
1809 SwiftWriteLM11Cy, SwiftWriteLM14Cy,
1810 SwiftWriteLM15Cy, SwiftWriteLM18CyNo,
1811 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1812 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1813 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1814 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1815 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1816 SwiftWriteP01OneCycle, SwiftVLDMPerm4]>,
1817 // Unknow number of registers, just use resources for two registers.
1818 SchedVar<NoSchedPred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1819 SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
1820 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1821 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1822 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1823 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1824 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1825 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1826 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1827 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1828 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1829 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1830 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1831 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1832 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1833 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1834 SwiftWriteP01OneCycle, SwiftVLDMPerm2]>
1835 ]> { let Variadic = 1; }
1836
1837 def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
1838
1839 def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM],
1840 (instregex "VLDM[SD](IA|DB)_UPD$")>;
1841
1842 def SwiftWriteVSTM : SchedWriteVariant<[
1843 // One S register.
1844 SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>,
1845 // One D register.
1846 SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>,
1847 // Three S registers.
1848 SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>,
1849 // Assume one Q register.
1850 SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>,
1851 SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>,
1852 // Assume three D registers.
1853 SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>,
1854 SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>,
1855 // Assume two Q registers.
1856 SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>,
1857 SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>,
1858 // Assume 5 D registers.
1859 SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
1860 SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
Alp Tokercb402912014-01-24 17:20:08 +00001861 // Assume three Q registers.
Arnold Schwaighofera30936a2013-06-07 00:04:28 +00001862 SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
1863 SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
1864 // Assume 7 D registers.
1865 SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>,
1866 SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>,
1867 // Assume four Q registers.
1868 SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>,
1869 // Asumme two Q registers.
1870 SchedVar<NoSchedPred, [SwiftWriteSTM3]>
1871 ]> { let Variadic = 1; }
1872
1873 def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>;
1874
1875 def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM],
1876 (instregex "VSTM[SD](IA|DB)_UPD")>;
1877
1878 // 4.2.43 Advanced SIMD, Element or Structure Load and Store
1879 def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
1880 let Latency = 4;
1881 let ResourceCycles = [2];
1882 }
1883 def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
1884 let Latency = 4;
1885 let ResourceCycles = [3];
1886 }
1887 foreach Num = 1-2 in {
1888 def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
1889 let Latency = 0;
1890 let NumMicroOps = Num;
1891 let ResourceCycles = [Num];
1892 }
1893 }
1894 // VLDx
1895 // Multiple structures.
1896 // Single element structure loads.
1897 // We assume aligned.
1898 // Single/two register.
1899 def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>;
1900 def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle],
1901 (instregex "VLD1(d|q)(8|16|32|64)wb")>;
1902 // Three register.
1903 def : InstRW<[SwiftWrite3xP2FourCy],
1904 (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>;
1905 def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle],
1906 (instregex "VLD1(d|q)(8|16|32|64)Twb")>;
1907 /// Four Register.
1908 def : InstRW<[SwiftWrite2xP2FourCy],
1909 (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>;
1910 def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle],
1911 (instregex "VLD1(d|q)(8|16|32|64)Qwb")>;
1912 // Two element structure loads.
1913 // Two/four register.
1914 def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2],
1915 (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
1916 def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1917 SwiftVLDMPerm2],
1918 (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
1919 // Three element structure.
1920 def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
1921 SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1922 (instregex "VLD3(d|q)(8|16|32)$")>;
1923 def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1924 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
1925
1926 def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
1927 SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1928 (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
1929 def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3,
1930 SwiftWrite3xP2FourCy],
1931 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1932 // Four element structure loads.
1933 def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1934 SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
1935 SwiftWrite3xP2FourCy],
1936 (instregex "VLD4(d|q)(8|16|32)$")>;
1937 def : InstRW<[SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
1938 SwiftWrite3xP2FourCy],
1939 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
1940 def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1941 SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1942 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
1943 (instregex "VLD4(d|q)(8|16|32)_UPD")>;
1944 def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1945 SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
1946 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1947
1948 // Single all/lane loads.
1949 // One element structure.
1950 def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2],
1951 (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1952 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2],
1953 (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)",
1954 "VLD1LNq(8|16|32)Pseudo_UPD")>;
1955 // Two element structure.
1956 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2],
1957 (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
1958 "VLD2LN(d|q)(8|16|32)Pseudo$")>;
1959 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle,
1960 SwiftExt1xP0, SwiftVLDMPerm2],
1961 (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
1962 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
1963 SwiftExt1xP0, SwiftVLDMPerm2],
1964 (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>;
1965 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
1966 SwiftExt1xP0, SwiftVLDMPerm2],
1967 (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
1968 // Three element structure.
1969 def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0,
1970 SwiftVLDMPerm3],
1971 (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$",
1972 "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1973 def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy,
1974 SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3],
1975 (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
1976 def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
1977 SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
1978 (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
1979 // Four element struture.
1980 def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
1981 SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
1982 (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
1983 "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1984 def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
1985 SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0,
1986 SwiftVLDMPerm5],
1987 (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>;
1988 def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy,
1989 SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0,
1990 SwiftVLDMPerm5],
1991 (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>;
1992 // VSTx
1993 // Multiple structures.
1994 // Single element structure store.
1995 def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>;
1996 def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>;
1997 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2],
1998 (instregex "VST1d(8|16|32|64)wb")>;
1999 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2],
2000 (instregex "VST1q(8|16|32|64)wb")>;
2001 def : InstRW<[SwiftWrite3xP2],
2002 (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
2003 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2],
2004 (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
2005 def : InstRW<[SwiftWrite4xP2],
2006 (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
2007 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2],
2008 (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
2009 // Two element structure store.
2010 def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
2011 (instregex "VST2(d|b)(8|16|32)$")>;
2012 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
2013 (instregex "VST2(b|d)(8|16|32)wb")>;
2014 def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2015 (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
2016 def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2017 (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
2018 // Three element structure store.
2019 def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2020 (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
2021 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
2022 (instregex "VST3(d|q)(8|16|32)_UPD",
2023 "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
2024 // Four element structure store.
2025 def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2026 (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
2027 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4],
2028 (instregex "VST4(d|q)(8|16|32)_UPD",
2029 "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
2030 // Single/all lane store.
2031 // One element structure.
2032 def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
2033 (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
2034 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
2035 (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
2036 // Two element structure.
2037 def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2],
2038 (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
2039 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2],
2040 (instregex "VST2LN(d|q)(8|16|32)_UPD",
2041 "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
2042 // Three element structure.
2043 def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2044 (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
2045 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
2046 (instregex "VST3LN(d|q)(8|16|32)_UPD",
2047 "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
2048 // Four element structure.
2049 def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2050 (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
2051 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2],
2052 (instregex "VST4LN(d|q)(8|16|32)_UPD",
2053 "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
2054
Arnold Schwaighofer5f0a2ce2013-06-07 01:10:36 +00002055 // 4.2.44 VFP, Divide and Square Root
2056 def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
2057 let NumMicroOps = 1;
2058 let Latency = 17;
2059 let ResourceCycles = [1, 15];
2060 }
2061 def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
2062 let NumMicroOps = 1;
2063 let Latency = 32;
2064 let ResourceCycles = [1, 30];
2065 }
2066 def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
2067 def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
2068
2069 // Not specified.
2070 def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
Arnold Schwaighofer2773f1d2013-06-05 16:06:11 +00002071 // Preload.
2072 def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
2073 let ResourceCycles = [0];
2074 }
2075
Arnold Schwaighofer6793aeb2013-04-01 13:07:05 +00002076}