blob: 182d2c3c1ef20a472614581e1bf9f0510f23dff2 [file] [log] [blame]
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines resources required by some of P9 instruction. This is part
11// P9 processor model used for instruction scheduling. Not every instruction
12// is listed here. Instructions in this file belong to itinerary classes that
13// have instructions with different resource requirements.
14//
Stefan Pintilie590eb272017-09-22 20:17:25 +000015// The makeup of the P9 CPU is modeled as follows:
16// - Each CPU is made up of two superslices.
17// - Each superslice is made up of two slices. Therefore, there are 4 slices
18// for each CPU.
19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20// - Each CPU has:
21// - One CY (Crypto) unit P9_CY_*
22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23// - Two PM (Permute) units. One on each superslice. P9_PM_*
24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26// - Four DP (Floating Point) units. One on each slice. P9_DP_*
27// This also includes fixed point multiply add.
28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29// - Four Load/Store Queues. P9_LS_*
30// - Each set of instructions will require a number of these resources.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000031//===----------------------------------------------------------------------===//
32
Stefan Pintilie590eb272017-09-22 20:17:25 +000033// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000036def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
Stefan Pintilie590eb272017-09-22 20:17:25 +000037 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000038 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +000039 (instregex "VADDU(B|H|W|D)M$"),
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000040 VADDCUW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000041 VAND,
42 VANDC,
43 VCMPEQUB,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000044 VCMPEQUD,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000045 VCMPEQUH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000046 VCMPEQUW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000047 VCMPNEB,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000048 VCMPNEH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000049 VCMPNEW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000050 VCMPNEZB,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000051 VCMPNEZH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000052 VCMPNEZW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000053 VEQV,
54 VEXTSB2D,
55 VEXTSB2W,
56 VEXTSH2D,
57 VEXTSH2W,
58 VEXTSW2D,
Stefan Pintilie590eb272017-09-22 20:17:25 +000059 VRLB,
60 VRLD,
61 VRLDMI,
62 VRLDNM,
63 VRLH,
64 VRLW,
65 VRLWMI,
66 VRLWNM,
67 VSRAB,
68 VSRAD,
69 VSRAH,
70 VSRAW,
71 VSRB,
72 VSRD,
73 VSRH,
74 VSRW,
75 VSLB,
76 VSLD,
77 VSLH,
78 VSLW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000079 VMRGEW,
80 VMRGOW,
81 VNAND,
82 VNEGD,
83 VNEGW,
84 VNOR,
85 VOR,
86 VORC,
87 VPOPCNTB,
88 VPOPCNTH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000089 VSEL,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000090 VSUBUBM,
91 VSUBUDM,
92 VSUBUHM,
93 VSUBUWM,
94 VXOR,
95 V_SET0B,
96 V_SET0H,
97 V_SET0,
98 XVABSDP,
99 XVABSSP,
100 XVCPSGNDP,
101 XVCPSGNSP,
102 XVIEXPDP,
103 XVNABSDP,
104 XVNABSSP,
105 XVNEGDP,
106 XVNEGSP,
107 XVXEXPDP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000108 XVIEXPSP,
109 XVXEXPSP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000110 XXLAND,
111 XXLANDC,
112 XXLEQV,
113 XXLNAND,
114 XXLNOR,
115 XXLOR,
116 XXLORf,
117 XXLORC,
118 XXLXOR,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000119 XXSEL,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000120 XSABSQP,
121 XSCPSGNQP,
122 XSIEXPQP,
123 XSNABSQP,
124 XSNEGQP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000125 XSXEXPQP
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000126)>;
127
Stefan Pintilie590eb272017-09-22 20:17:25 +0000128// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
129// slingle slice. However, since it is Restricted it requires all 3 dispatches
130// (DISP) for that superslice.
131def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000132 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000133 FCMPUS,
134 FCMPUD,
135 XSTSTDCDP,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000136 XSTSTDCSP,
137 FTDIV,
138 FTSQRT,
139 (instregex "CMPRB(8)?$"),
140 (instregex "TD(I)?$"),
141 (instregex "TW(I)?$")
Stefan Pintilie590eb272017-09-22 20:17:25 +0000142)>;
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000143
Stefan Pintilie590eb272017-09-22 20:17:25 +0000144// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
145def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
146 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +0000147 (instregex "XSMAX(C|J)?DP$"),
148 (instregex "XSMIN(C|J)?DP$"),
149 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000150 XSTDIVDP,
151 XSTSQRTDP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000152 XSXSIGDP,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000153 XSCVSPDPN,
154 SETB,
155 BPERMD,
156 (instregex "CNT(L|T)Z(D|W)(8)?$"),
157 (instregex "POPCNT(D|W)$"),
158 (instregex "CMPB(8)?$")
Stefan Pintilie590eb272017-09-22 20:17:25 +0000159)>;
160
161// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
162def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
163 (instrs
164 ADDIStocHA,
165 ADDItocL,
166 MCRF,
167 MCRXRX,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000168 XSNABSDP,
169 XSXEXPDP,
170 XSABSDP,
171 XSNEGDP,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000172 XSCPSGNDP,
173 (instregex "S(L|R)D$"),
174 (instregex "SRAD(I)?$"),
175 (instregex "EXTSWSLI$"),
176 SRADI_32,
177 RLDIC,
178 ADDIC,
179 ADDICo,
180 LA,
181 (instregex "CMP(WI|LWI|W|LW)(8)?$"),
182 (instregex "SUBF(I)?C(8)?$"),
183 (instregex "ANDI(S)?o(8)?$"),
184 (instregex "ADD(I)?C(8)?(o)?$"),
185 (instregex "ADD(E|ME|ZE)(8)?$"),
186 (instregex "SUBF(E|ME|ZE)?(8)?$"),
187 (instregex "NEG(8)?$"),
188 (instregex "POPCNTB$"),
189 (instregex "ADD(I|IS)?(8)?$"),
190 (instregex "LI(S)?(8)?$"),
191 (instregex "(X)?OR(I|IS)?(8)?$"),
192 NOP,
193 (instregex "NAND(8)?$"),
194 (instregex "AND(C)?(8)?$"),
195 (instregex "NOR(8)?$"),
196 (instregex "OR(C)?(8)?$"),
197 (instregex "EQV(8)?$"),
198 (instregex "EXTS(B|H)(8)?$"),
199 (instregex "ADD(4|8)(TLS)?(_)?$"),
200 (instregex "NEG(8)?$")
Stefan Pintilie590eb272017-09-22 20:17:25 +0000201)>;
202
203// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
204// slingle slice. However, since it is Restricted it requires all 3 dispatches
205// (DISP) for that superslice.
206def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
207 (instrs
208 RLDCL,
209 RLDCR,
210 RLDIMI,
211 RLDICL,
212 RLDICR,
213 RLDICL_32_64,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000214 RLDICL_32,
215 RLDICR_32,
216 (instregex "RLWIMI(8)?$"),
Stefan Pintilie590eb272017-09-22 20:17:25 +0000217 XSIEXPDP,
218 FMR,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000219 (instregex "S(L|R)W(8)?$"),
220 (instregex "RLW(INM|NM)(8)?$"),
221 (instregex "F(N)?ABS(D|S)$"),
222 (instregex "FNEG(D|S)$"),
223 (instregex "FCPSGN(D|S)$"),
224 (instregex "SRAW(I)?$"),
225 (instregex "ISEL(8)?$")
Stefan Pintilie590eb272017-09-22 20:17:25 +0000226)>;
227
228// Three cycle ALU vector operation that uses an entire superslice.
229// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
230// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
231def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
232 DISP_1C, DISP_1C, DISP_1C],
233 (instrs
234 VBPERMD,
235 VABSDUB,
236 VABSDUH,
237 VABSDUW,
238 VADDUBS,
239 VADDUHS,
240 VADDUWS,
241 VAVGSB,
242 VAVGSH,
243 VAVGSW,
244 VAVGUB,
245 VAVGUH,
246 VAVGUW,
247 VCMPEQFP,
248 VCMPEQFPo,
249 VCMPGEFP,
250 VCMPGEFPo,
251 VCMPBFP,
252 VCMPBFPo,
253 VCMPGTFP,
254 VCMPGTFPo,
255 VCLZB,
256 VCLZD,
257 VCLZH,
258 VCLZW,
259 VCTZB,
260 VCTZD,
261 VCTZH,
262 VCTZW,
263 VADDSBS,
264 VADDSHS,
265 VADDSWS,
266 VMINFP,
267 VMINSB,
268 VMINSD,
269 VMINSH,
270 VMINSW,
271 VMINUB,
272 VMINUD,
273 VMINUH,
274 VMINUW,
275 VMAXFP,
276 VMAXSB,
277 VMAXSD,
278 VMAXSH,
279 VMAXSW,
280 VMAXUB,
281 VMAXUD,
282 VMAXUH,
283 VMAXUW,
284 VPOPCNTW,
285 VPOPCNTD,
286 VPRTYBD,
287 VPRTYBW,
288 VSHASIGMAD,
289 VSHASIGMAW,
290 VSUBSBS,
291 VSUBSHS,
292 VSUBSWS,
293 VSUBUBS,
294 VSUBUHS,
295 VSUBUWS,
296 VSUBCUW,
297 VCMPGTSB,
298 VCMPGTSBo,
299 VCMPGTSD,
300 VCMPGTSDo,
301 VCMPGTSH,
302 VCMPGTSHo,
303 VCMPGTSW,
304 VCMPGTSWo,
305 VCMPGTUB,
306 VCMPGTUBo,
307 VCMPGTUD,
308 VCMPGTUDo,
309 VCMPGTUH,
310 VCMPGTUHo,
311 VCMPGTUW,
312 VCMPGTUWo,
313 VCMPNEBo,
314 VCMPNEHo,
315 VCMPNEWo,
316 VCMPNEZBo,
317 VCMPNEZHo,
318 VCMPNEZWo,
319 VCMPEQUBo,
320 VCMPEQUDo,
321 VCMPEQUHo,
322 VCMPEQUWo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000323 XVCMPEQDP,
324 XVCMPEQDPo,
325 XVCMPEQSP,
326 XVCMPEQSPo,
327 XVCMPGEDP,
328 XVCMPGEDPo,
329 XVCMPGESP,
330 XVCMPGESPo,
331 XVCMPGTDP,
332 XVCMPGTDPo,
333 XVCMPGTSP,
334 XVCMPGTSPo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000335 XVMAXDP,
336 XVMAXSP,
337 XVMINDP,
338 XVMINSP,
339 XVTDIVDP,
340 XVTDIVSP,
341 XVTSQRTDP,
342 XVTSQRTSP,
343 XVTSTDCDP,
344 XVTSTDCSP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000345 XVXSIGDP,
346 XVXSIGSP
347)>;
348
Stefan Pintilie590eb272017-09-22 20:17:25 +0000349// 7 cycle DP vector operation that uses an entire superslice.
350// Uses both DP units (the even DPE and odd DPO units), two pipelines
351// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
352def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
353 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000354 (instrs
355 VADDFP,
356 VCTSXS,
357 VCTSXS_0,
358 VCTUXS,
359 VCTUXS_0,
360 VEXPTEFP,
361 VLOGEFP,
362 VMADDFP,
363 VMHADDSHS,
364 VNMSUBFP,
365 VREFP,
366 VRFIM,
367 VRFIN,
368 VRFIP,
369 VRFIZ,
370 VRSQRTEFP,
371 VSUBFP,
372 XVADDDP,
373 XVADDSP,
374 XVCVDPSP,
375 XVCVDPSXDS,
376 XVCVDPSXWS,
377 XVCVDPUXDS,
378 XVCVDPUXWS,
379 XVCVHPSP,
380 XVCVSPDP,
381 XVCVSPHP,
382 XVCVSPSXDS,
383 XVCVSPSXWS,
384 XVCVSPUXDS,
385 XVCVSPUXWS,
386 XVCVSXDDP,
387 XVCVSXDSP,
388 XVCVSXWDP,
389 XVCVSXWSP,
390 XVCVUXDDP,
391 XVCVUXDSP,
392 XVCVUXWDP,
393 XVCVUXWSP,
394 XVMADDADP,
395 XVMADDASP,
396 XVMADDMDP,
397 XVMADDMSP,
398 XVMSUBADP,
399 XVMSUBASP,
400 XVMSUBMDP,
401 XVMSUBMSP,
402 XVMULDP,
403 XVMULSP,
404 XVNMADDADP,
405 XVNMADDASP,
406 XVNMADDMDP,
407 XVNMADDMSP,
408 XVNMSUBADP,
409 XVNMSUBASP,
410 XVNMSUBMDP,
411 XVNMSUBMSP,
412 XVRDPI,
413 XVRDPIC,
414 XVRDPIM,
415 XVRDPIP,
416 XVRDPIZ,
417 XVREDP,
418 XVRESP,
419 XVRSPI,
420 XVRSPIC,
421 XVRSPIM,
422 XVRSPIP,
423 XVRSPIZ,
424 XVRSQRTEDP,
425 XVRSQRTESP,
426 XVSUBDP,
427 XVSUBSP,
428 VCFSX,
429 VCFSX_0,
430 VCFUX,
431 VCFUX_0,
432 VMHRADDSHS,
433 VMLADDUHM,
434 VMSUMMBM,
435 VMSUMSHM,
436 VMSUMSHS,
437 VMSUMUBM,
438 VMSUMUHM,
439 VMSUMUHS,
440 VMULESB,
441 VMULESH,
442 VMULESW,
443 VMULEUB,
444 VMULEUH,
445 VMULEUW,
446 VMULOSB,
447 VMULOSH,
448 VMULOSW,
449 VMULOUB,
450 VMULOUH,
451 VMULOUW,
452 VMULUWM,
453 VSUM2SWS,
454 VSUM4SBS,
455 VSUM4SHS,
456 VSUM4UBS,
457 VSUMSWS
458)>;
459
Stefan Pintilie590eb272017-09-22 20:17:25 +0000460// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
461// dispatch units for the superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000462def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
463 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000464 FRSP,
465 FRIND,
466 FRINS,
467 FRIPD,
468 FRIPS,
469 FRIZD,
470 FRIZS,
471 FRIMD,
472 FRIMS,
473 FRE,
474 FRES,
475 FRSQRTE,
476 FRSQRTES,
477 FMADDS,
478 FMADD,
479 FMSUBS,
480 FMSUB,
481 FNMADDS,
482 FNMADD,
483 FNMSUBS,
484 FNMSUB,
485 FSELD,
486 FSELS,
487 FADDS,
488 FMULS,
489 FMUL,
490 FSUBS,
491 FCFID,
492 FCTID,
493 FCTIDZ,
494 FCFIDU,
495 FCFIDS,
496 FCFIDUS,
497 FCTIDUZ,
498 FCTIWUZ,
499 FCTIW,
500 FCTIWZ,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000501 XSMADDADP,
502 XSMADDASP,
503 XSMADDMDP,
504 XSMADDMSP,
505 XSMSUBADP,
506 XSMSUBASP,
507 XSMSUBMDP,
508 XSMSUBMSP,
509 XSMULDP,
510 XSMULSP,
511 XSNMADDADP,
512 XSNMADDASP,
513 XSNMADDMDP,
514 XSNMADDMSP,
515 XSNMSUBADP,
516 XSNMSUBASP,
517 XSNMSUBMDP,
518 XSNMSUBMSP
519)>;
520
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000521// 7 cycle Restricted DP operation and one 2 cycle ALU operation.
522// The DP is restricted so we need a full 5 dispatches.
523def : InstRW<[P9_DPOpAndALUOp_9C, IP_EXEC_1C, IP_EXEC_1C,
524 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
525 (instrs
526 FMULo,
527 FMADDo,
528 FMSUBo,
529 FNMADDo,
530 FNMSUBo
531)>;
532
Stefan Pintilie590eb272017-09-22 20:17:25 +0000533// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000534def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
535 (instrs
536 XSADDDP,
537 XSADDSP,
538 XSCVDPHP,
539 XSCVDPSP,
540 XSCVDPSXDS,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000541 XSCVDPSXDSs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000542 XSCVDPSXWS,
543 XSCVDPUXDS,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000544 XSCVDPUXDSs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000545 XSCVDPUXWS,
546 XSCVHPDP,
547 XSCVSPDP,
548 XSCVSXDDP,
549 XSCVSXDSP,
550 XSCVUXDDP,
551 XSCVUXDSP,
552 XSRDPI,
553 XSRDPIC,
554 XSRDPIM,
555 XSRDPIP,
556 XSRDPIZ,
557 XSREDP,
558 XSRESP,
559 //XSRSP,
560 XSRSQRTEDP,
561 XSRSQRTESP,
562 XSSUBDP,
563 XSSUBSP,
564 XSCVDPSPN
565)>;
566
Stefan Pintilie590eb272017-09-22 20:17:25 +0000567// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
568// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
569// dispatches.
570def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000571 (instrs
572 VBPERMQ,
573 VCLZLSBB,
574 VCTZLSBB,
575 VEXTRACTD,
576 VEXTRACTUB,
577 VEXTRACTUH,
578 VEXTRACTUW,
579 VEXTUBLX,
580 VEXTUBRX,
581 VEXTUHLX,
582 VEXTUHRX,
583 VEXTUWLX,
584 VEXTUWRX,
585 VGBBD,
586 VINSERTB,
587 VINSERTD,
588 VINSERTH,
589 VINSERTW,
590 VMRGHB,
591 VMRGHH,
592 VMRGHW,
593 VMRGLB,
594 VMRGLH,
595 VMRGLW,
596 VPERM,
597 VPERMR,
598 VPERMXOR,
599 VPKPX,
600 VPKSDSS,
601 VPKSDUS,
602 VPKSHSS,
603 VPKSHUS,
604 VPKSWSS,
605 VPKSWUS,
606 VPKUDUM,
607 VPKUDUS,
608 VPKUHUM,
609 VPKUHUS,
610 VPKUWUM,
611 VPKUWUS,
612 VPRTYBQ,
613 VSL,
614 VSLDOI,
615 VSLO,
616 VSLV,
617 VSPLTB,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000618 VSPLTBs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000619 VSPLTH,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000620 VSPLTHs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000621 VSPLTISB,
622 VSPLTISH,
623 VSPLTISW,
624 VSPLTW,
625 VSR,
626 VSRO,
627 VSRV,
628 VUPKHPX,
629 VUPKHSB,
630 VUPKHSH,
631 VUPKHSW,
632 VUPKLPX,
633 VUPKLSB,
634 VUPKLSH,
635 VUPKLSW,
636 XXBRD,
637 XXBRH,
638 XXBRQ,
639 XXBRW,
640 XXEXTRACTUW,
641 XXINSERTW,
642 XXMRGHW,
643 XXMRGLW,
644 XXPERM,
645 XXPERMR,
646 XXSLDWI,
647 XXSPLTIB,
648 XXSPLTW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000649 XXSPLTWs,
650 XXPERMDI,
651 XXPERMDIs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000652 VADDCUQ,
653 VADDECUQ,
654 VADDEUQM,
655 VADDUQM,
656 VMUL10CUQ,
657 VMUL10ECUQ,
658 VMUL10EUQ,
659 VMUL10UQ,
660 VSUBCUQ,
661 VSUBECUQ,
662 VSUBEUQM,
663 VSUBUQM,
664 XSCMPEXPQP,
665 XSCMPOQP,
666 XSCMPUQP,
667 XSTSTDCQP,
668 XSXSIGQP
669)>;
670
Stefan Pintilie590eb272017-09-22 20:17:25 +0000671// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
672// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
673// dispatches.
674def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000675 (instrs
676 XSADDQP,
677 XSADDQPO,
678 XSCVDPQP,
679 XSCVQPDP,
680 XSCVQPDPO,
681 XSCVQPSDZ,
682 XSCVQPSWZ,
683 XSCVQPUDZ,
684 XSCVQPUWZ,
685 XSCVSDQP,
686 XSCVUDQP,
687 XSRQPI,
688 XSRQPXP,
689 XSSUBQP,
690 XSSUBQPO
691)>;
692
Stefan Pintilie590eb272017-09-22 20:17:25 +0000693// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
694// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
695// dispatches.
696def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000697 (instrs
698 XSMADDQP,
699 XSMADDQPO,
700 XSMSUBQP,
701 XSMSUBQPO,
702 XSMULQP,
703 XSMULQPO,
704 XSNMADDQP,
705 XSNMADDQPO,
706 XSNMSUBQP,
707 XSNMSUBQPO
708)>;
709
Stefan Pintilie590eb272017-09-22 20:17:25 +0000710// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
711// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
712// dispatches.
713def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000714 (instrs
715 XSDIVQP,
716 XSDIVQPO
717)>;
718
Stefan Pintilie590eb272017-09-22 20:17:25 +0000719// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
720// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
721// dispatches.
722def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000723 (instrs
724 XSSQRTQP,
725 XSSQRTQPO
726)>;
727
Stefan Pintilie590eb272017-09-22 20:17:25 +0000728// 5 Cycle load uses a single slice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000729def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
730 (instrs
731 LXSDX,
732 LXVD2X,
733 LXSIWZX,
734 LXV,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000735 LXVX,
736 LXSD,
Tony Jiang438bf4a2017-11-20 14:38:30 +0000737 DFLOADf64,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000738 XFLOADf64,
739 LIWZX
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000740)>;
741
Stefan Pintilie590eb272017-09-22 20:17:25 +0000742// 4 Cycle load uses a single slice.
743def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
744 (instrs
745 COPY
746)>;
747
748// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
749// superslice.
750def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000751 (instrs
752 LFIWZX,
753 LFDX,
754 LFD
755)>;
756
Stefan Pintilie590eb272017-09-22 20:17:25 +0000757// Cracked Restricted Load instruction.
758// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
759// operations cannot be done at the same time and so their latencies are added.
760// Full 6 dispatches are required as this is both cracked and restricted.
761def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000762 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
763 (instrs
764 LFIWAX,
765 LFSX,
766 LFS
767)>;
768
Stefan Pintilie590eb272017-09-22 20:17:25 +0000769// Cracked Load instruction.
770// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
771// operations cannot be done at the same time and so their latencies are added.
772// Full 4 dispatches are required as this is a cracked instruction.
773def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
774 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
775 (instrs
776 LXSSPX,
777 LXSIWAX,
778 LXSSP,
Tony Jiang438bf4a2017-11-20 14:38:30 +0000779 DFLOADf32,
780 XFLOADf32,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000781 LIWAX
Stefan Pintilie590eb272017-09-22 20:17:25 +0000782)>;
783
784// Cracked Load that requires the PM resource.
785// Since the Load and the PM cannot be done at the same time the latencies are
786// added. Requires 8 cycles.
787// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
788// as well as 3 dispatches for the PM. The Load requires the remaining 2
789// dispatches.
790def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
791 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000792 (instrs
793 LXVDSX,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000794 LXVWSX,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000795 LXVW4X
796)>;
797
Stefan Pintilie590eb272017-09-22 20:17:25 +0000798// Single slice Restricted store operation. The restricted operation requires
799// all three dispatches for the superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000800def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
801 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +0000802 (instregex "STF(S|D|IWX|SX|DX)$"),
803 (instregex "STXS(DX|SPX|IWX)$"),
Stefan Pintilie590eb272017-09-22 20:17:25 +0000804 DFSTOREf32,
Tony Jiang438bf4a2017-11-20 14:38:30 +0000805 DFSTOREf64,
806 XFSTOREf32,
807 XFSTOREf64,
808 STIWX
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000809)>;
810
Stefan Pintilie590eb272017-09-22 20:17:25 +0000811// Store operation that requires the whole superslice.
812def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
813 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000814 (instrs
815 STXVD2X,
816 STXVW4X
817)>;
818
Stefan Pintilie626b6512018-02-23 20:37:10 +0000819// Cracked instruction made up up two restriced stores.
820def : InstRW<[P9_LS_1C, P9_LS_1C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
821 IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
822 (instrs
823 STFDEPX
824)>;
825
826// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
827// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
828// dispatches.
829def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
830 (instrs
831 (instregex "M(T|F)VRSAVE(v)?$")
832)>;
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000833
Stefan Pintilie590eb272017-09-22 20:17:25 +0000834// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
835// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
836// dispatches.
837def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
838 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000839 (instrs
840 DIVW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000841 DIVWU,
842 MODSW
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000843)>;
844
Stefan Pintilie590eb272017-09-22 20:17:25 +0000845// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
846// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
847// dispatches.
848def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
849 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000850 (instrs
851 DIVWE,
852 DIVD,
853 DIVWEU,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000854 DIVDU,
855 MODSD,
856 MODUD,
857 MODUW
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000858)>;
859
Stefan Pintilie590eb272017-09-22 20:17:25 +0000860// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
861// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
862// dispatches.
863def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
864 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000865 (instrs
866 DIVDE,
867 DIVDEU
868)>;
869
Stefan Pintilie590eb272017-09-22 20:17:25 +0000870// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
871// and one full superslice for the DIV operation since there is only one DIV
872// per superslice. Latency of DIV plus ALU is 26.
873def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
874 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000875 (instrs
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000876 DIVDo,
877 DIVDUo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000878 DIVWEo,
879 DIVWEUo
880)>;
881
Stefan Pintilie590eb272017-09-22 20:17:25 +0000882// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
883// and one full superslice for the DIV operation since there is only one DIV
884// per superslice. Latency of DIV plus ALU is 42.
885def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
886 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000887 (instrs
888 DIVDEo,
889 DIVDEUo
890)>;
891
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000892// CR access instructions in _BrMCR, IIC_BrMCRX.
893
Stefan Pintilie590eb272017-09-22 20:17:25 +0000894// Cracked, restricted, ALU operations.
895// Here the two ALU ops can actually be done in parallel and therefore the
896// latencies are not added together. Otherwise this is like having two
897// instructions running together on two pipelines and 6 dispatches.
898// ALU ops are 2 cycles each.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000899def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
900 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
901 (instrs
902 MTOCRF,
903 MTOCRF8,
904 MTCRF,
905 MTCRF8
906)>;
907
Stefan Pintilie590eb272017-09-22 20:17:25 +0000908// Cracked, restricted, ALU operations.
909// Here the two ALU ops can actually be done in parallel and therefore the
910// latencies are not added together. Otherwise this is like having two
911// instructions running together on two pipelines and 6 dispatches.
912// ALU ops are 3 cycles each.
913def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000914 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
915 (instrs
916 MCRFS
917)>;
918
Stefan Pintilie626b6512018-02-23 20:37:10 +0000919def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
920 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
921 (instrs
922 RLWINMo
923)>;
924
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000925// FP Div instructions in IIC_FPDivD and IIC_FPDivS.
926
Stefan Pintilie590eb272017-09-22 20:17:25 +0000927// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000928def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
929 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000930 FDIV
931)>;
932
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000933// 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
934def : InstRW<[P9_DPOpAndALUOp_35C_8, IP_EXEC_1C, IP_EXEC_1C,
935 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
936 (instrs
937 FDIVo
938)>;
939
Stefan Pintilie590eb272017-09-22 20:17:25 +0000940// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
941def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
942 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000943 XSDIVDP
944)>;
945
Stefan Pintilie590eb272017-09-22 20:17:25 +0000946// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000947def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
948 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000949 FDIVS
950)>;
951
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000952// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
953def : InstRW<[P9_DPOpAndALUOp_24C_5, IP_EXEC_1C, IP_EXEC_1C,
954 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
955 (instrs
956 FDIVSo
957)>;
958
Stefan Pintilie590eb272017-09-22 20:17:25 +0000959// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
960def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
961 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000962 XSDIVSP
963)>;
964
Stefan Pintilie590eb272017-09-22 20:17:25 +0000965// 24 Cycle DP Vector Instruction. Takes one full superslice.
966// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
967// superslice.
968def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
969 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000970 (instrs
971 XVDIVSP
972)>;
973
Stefan Pintilie590eb272017-09-22 20:17:25 +0000974// 33 Cycle DP Vector Instruction. Takes one full superslice.
975// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
976// superslice.
977def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
978 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000979 (instrs
980 XVDIVDP
981)>;
982
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000983// Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.
984
Stefan Pintilie590eb272017-09-22 20:17:25 +0000985// Instruction cracked into three pieces. One Load and two ALU operations.
986// The Load and one of the ALU ops cannot be run at the same time and so the
987// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
988// Both the load and the ALU that depends on it are restricted and so they take
989// a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
990// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
991def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000992 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
993 DISP_1C, DISP_1C, DISP_1C, DISP_1C,
994 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
995 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +0000996 (instregex "LF(SU|SUX)$")
997)>;
998
999// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1000// the store and so it can be run at the same time as the store. The store is
1001// also restricted.
1002def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1003 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1004 (instrs
1005 (instregex "STF(SU|SUX|DU|DUX)$")
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001006)>;
1007
Stefan Pintilie590eb272017-09-22 20:17:25 +00001008// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1009// the load and so it can be run at the same time as the load. The load is also
1010// restricted. 3 dispatches are from the restricted load while the other two
1011// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1012// is required for the ALU.
1013def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001014 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1015 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +00001016 (instregex "LF(DU|DUX)$")
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001017)>;
1018
Stefan Pintilie590eb272017-09-22 20:17:25 +00001019// Crypto Instructions
1020
1021// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1022// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
1023// dispatches.
1024def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
1025 (instrs
1026 VPMSUMB,
1027 VPMSUMD,
1028 VPMSUMH,
1029 VPMSUMW,
1030 VCIPHER,
1031 VCIPHERLAST,
1032 VNCIPHER,
1033 VNCIPHERLAST,
1034 VSBOX
1035)>;
Stefan Pintilie626b6512018-02-23 20:37:10 +00001036
1037// Instructions without scheduling support.
1038def : InstRW<[],
1039 (instrs
1040 (instregex "(H)?RFI(D)?$")
1041)> { let Unsupported = 1; }