blob: dc6ed16e53ce7e629f45e8dabe030debe6a91d1a [file] [log] [blame]
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines resources required by some of P9 instruction. This is part
11// P9 processor model used for instruction scheduling. Not every instruction
12// is listed here. Instructions in this file belong to itinerary classes that
13// have instructions with different resource requirements.
14//
Stefan Pintilie590eb272017-09-22 20:17:25 +000015// The makeup of the P9 CPU is modeled as follows:
16// - Each CPU is made up of two superslices.
17// - Each superslice is made up of two slices. Therefore, there are 4 slices
18// for each CPU.
19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20// - Each CPU has:
21// - One CY (Crypto) unit P9_CY_*
22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23// - Two PM (Permute) units. One on each superslice. P9_PM_*
24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26// - Four DP (Floating Point) units. One on each slice. P9_DP_*
27// This also includes fixed point multiply add.
28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29// - Four Load/Store Queues. P9_LS_*
30// - Each set of instructions will require a number of these resources.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000031//===----------------------------------------------------------------------===//
32
Stefan Pintilie590eb272017-09-22 20:17:25 +000033// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000036def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
Stefan Pintilie590eb272017-09-22 20:17:25 +000037 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000038 (instrs
39 VADDCUW,
40 VADDUBM,
41 VADDUDM,
42 VADDUHM,
43 VADDUWM,
44 VAND,
45 VANDC,
46 VCMPEQUB,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000047 VCMPEQUD,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000048 VCMPEQUH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000049 VCMPEQUW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000050 VCMPNEB,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000051 VCMPNEH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000052 VCMPNEW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000053 VCMPNEZB,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000054 VCMPNEZH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000055 VCMPNEZW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000056 VEQV,
57 VEXTSB2D,
58 VEXTSB2W,
59 VEXTSH2D,
60 VEXTSH2W,
61 VEXTSW2D,
Stefan Pintilie590eb272017-09-22 20:17:25 +000062 VRLB,
63 VRLD,
64 VRLDMI,
65 VRLDNM,
66 VRLH,
67 VRLW,
68 VRLWMI,
69 VRLWNM,
70 VSRAB,
71 VSRAD,
72 VSRAH,
73 VSRAW,
74 VSRB,
75 VSRD,
76 VSRH,
77 VSRW,
78 VSLB,
79 VSLD,
80 VSLH,
81 VSLW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000082 VMRGEW,
83 VMRGOW,
84 VNAND,
85 VNEGD,
86 VNEGW,
87 VNOR,
88 VOR,
89 VORC,
90 VPOPCNTB,
91 VPOPCNTH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000092 VSEL,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000093 VSUBUBM,
94 VSUBUDM,
95 VSUBUHM,
96 VSUBUWM,
97 VXOR,
98 V_SET0B,
99 V_SET0H,
100 V_SET0,
101 XVABSDP,
102 XVABSSP,
103 XVCPSGNDP,
104 XVCPSGNSP,
105 XVIEXPDP,
106 XVNABSDP,
107 XVNABSSP,
108 XVNEGDP,
109 XVNEGSP,
110 XVXEXPDP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000111 XVIEXPSP,
112 XVXEXPSP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000113 XXLAND,
114 XXLANDC,
115 XXLEQV,
116 XXLNAND,
117 XXLNOR,
118 XXLOR,
119 XXLORf,
120 XXLORC,
121 XXLXOR,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000122 XXSEL,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000123 XSABSQP,
124 XSCPSGNQP,
125 XSIEXPQP,
126 XSNABSQP,
127 XSNEGQP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000128 XSXEXPQP
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000129)>;
130
Stefan Pintilie590eb272017-09-22 20:17:25 +0000131// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
132// slingle slice. However, since it is Restricted it requires all 3 dispatches
133// (DISP) for that superslice.
134def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000135 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000136 FCMPUS,
137 FCMPUD,
138 XSTSTDCDP,
139 XSTSTDCSP
140)>;
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000141
Stefan Pintilie590eb272017-09-22 20:17:25 +0000142// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
143def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
144 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000145 XSMAXCDP,
146 XSMAXDP,
147 XSMAXJDP,
148 XSMINCDP,
149 XSMINDP,
150 XSMINJDP,
151 XSTDIVDP,
152 XSTSQRTDP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000153 XSCMPEQDP,
154 XSCMPEXPDP,
155 XSCMPGEDP,
156 XSCMPGTDP,
157 XSCMPODP,
158 XSCMPUDP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000159 XSXSIGDP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000160 XSCVSPDPN
161)>;
162
163// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
164def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
165 (instrs
166 ADDIStocHA,
167 ADDItocL,
168 MCRF,
169 MCRXRX,
170 SLD,
171 SRD,
172 SRAD,
173 SRADI,
174 RLDIC,
175 XSNABSDP,
176 XSXEXPDP,
177 XSABSDP,
178 XSNEGDP,
179 XSCPSGNDP
180)>;
181
182// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
183// slingle slice. However, since it is Restricted it requires all 3 dispatches
184// (DISP) for that superslice.
185def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
186 (instrs
187 RLDCL,
188 RLDCR,
189 RLDIMI,
190 RLDICL,
191 RLDICR,
192 RLDICL_32_64,
193 XSIEXPDP,
194 FMR,
195 FABSD,
196 FABSS,
197 FNABSD,
198 FNABSS,
199 FNEGD,
200 FNEGS,
201 FCPSGND,
202 FCPSGNS
203)>;
204
205// Three cycle ALU vector operation that uses an entire superslice.
206// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
207// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
208def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
209 DISP_1C, DISP_1C, DISP_1C],
210 (instrs
211 VBPERMD,
212 VABSDUB,
213 VABSDUH,
214 VABSDUW,
215 VADDUBS,
216 VADDUHS,
217 VADDUWS,
218 VAVGSB,
219 VAVGSH,
220 VAVGSW,
221 VAVGUB,
222 VAVGUH,
223 VAVGUW,
224 VCMPEQFP,
225 VCMPEQFPo,
226 VCMPGEFP,
227 VCMPGEFPo,
228 VCMPBFP,
229 VCMPBFPo,
230 VCMPGTFP,
231 VCMPGTFPo,
232 VCLZB,
233 VCLZD,
234 VCLZH,
235 VCLZW,
236 VCTZB,
237 VCTZD,
238 VCTZH,
239 VCTZW,
240 VADDSBS,
241 VADDSHS,
242 VADDSWS,
243 VMINFP,
244 VMINSB,
245 VMINSD,
246 VMINSH,
247 VMINSW,
248 VMINUB,
249 VMINUD,
250 VMINUH,
251 VMINUW,
252 VMAXFP,
253 VMAXSB,
254 VMAXSD,
255 VMAXSH,
256 VMAXSW,
257 VMAXUB,
258 VMAXUD,
259 VMAXUH,
260 VMAXUW,
261 VPOPCNTW,
262 VPOPCNTD,
263 VPRTYBD,
264 VPRTYBW,
265 VSHASIGMAD,
266 VSHASIGMAW,
267 VSUBSBS,
268 VSUBSHS,
269 VSUBSWS,
270 VSUBUBS,
271 VSUBUHS,
272 VSUBUWS,
273 VSUBCUW,
274 VCMPGTSB,
275 VCMPGTSBo,
276 VCMPGTSD,
277 VCMPGTSDo,
278 VCMPGTSH,
279 VCMPGTSHo,
280 VCMPGTSW,
281 VCMPGTSWo,
282 VCMPGTUB,
283 VCMPGTUBo,
284 VCMPGTUD,
285 VCMPGTUDo,
286 VCMPGTUH,
287 VCMPGTUHo,
288 VCMPGTUW,
289 VCMPGTUWo,
290 VCMPNEBo,
291 VCMPNEHo,
292 VCMPNEWo,
293 VCMPNEZBo,
294 VCMPNEZHo,
295 VCMPNEZWo,
296 VCMPEQUBo,
297 VCMPEQUDo,
298 VCMPEQUHo,
299 VCMPEQUWo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000300 XVCMPEQDP,
301 XVCMPEQDPo,
302 XVCMPEQSP,
303 XVCMPEQSPo,
304 XVCMPGEDP,
305 XVCMPGEDPo,
306 XVCMPGESP,
307 XVCMPGESPo,
308 XVCMPGTDP,
309 XVCMPGTDPo,
310 XVCMPGTSP,
311 XVCMPGTSPo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000312 XVMAXDP,
313 XVMAXSP,
314 XVMINDP,
315 XVMINSP,
316 XVTDIVDP,
317 XVTDIVSP,
318 XVTSQRTDP,
319 XVTSQRTSP,
320 XVTSTDCDP,
321 XVTSTDCSP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000322 XVXSIGDP,
323 XVXSIGSP
324)>;
325
Stefan Pintilie590eb272017-09-22 20:17:25 +0000326// 7 cycle DP vector operation that uses an entire superslice.
327// Uses both DP units (the even DPE and odd DPO units), two pipelines
328// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
329def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
330 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000331 (instrs
332 VADDFP,
333 VCTSXS,
334 VCTSXS_0,
335 VCTUXS,
336 VCTUXS_0,
337 VEXPTEFP,
338 VLOGEFP,
339 VMADDFP,
340 VMHADDSHS,
341 VNMSUBFP,
342 VREFP,
343 VRFIM,
344 VRFIN,
345 VRFIP,
346 VRFIZ,
347 VRSQRTEFP,
348 VSUBFP,
349 XVADDDP,
350 XVADDSP,
351 XVCVDPSP,
352 XVCVDPSXDS,
353 XVCVDPSXWS,
354 XVCVDPUXDS,
355 XVCVDPUXWS,
356 XVCVHPSP,
357 XVCVSPDP,
358 XVCVSPHP,
359 XVCVSPSXDS,
360 XVCVSPSXWS,
361 XVCVSPUXDS,
362 XVCVSPUXWS,
363 XVCVSXDDP,
364 XVCVSXDSP,
365 XVCVSXWDP,
366 XVCVSXWSP,
367 XVCVUXDDP,
368 XVCVUXDSP,
369 XVCVUXWDP,
370 XVCVUXWSP,
371 XVMADDADP,
372 XVMADDASP,
373 XVMADDMDP,
374 XVMADDMSP,
375 XVMSUBADP,
376 XVMSUBASP,
377 XVMSUBMDP,
378 XVMSUBMSP,
379 XVMULDP,
380 XVMULSP,
381 XVNMADDADP,
382 XVNMADDASP,
383 XVNMADDMDP,
384 XVNMADDMSP,
385 XVNMSUBADP,
386 XVNMSUBASP,
387 XVNMSUBMDP,
388 XVNMSUBMSP,
389 XVRDPI,
390 XVRDPIC,
391 XVRDPIM,
392 XVRDPIP,
393 XVRDPIZ,
394 XVREDP,
395 XVRESP,
396 XVRSPI,
397 XVRSPIC,
398 XVRSPIM,
399 XVRSPIP,
400 XVRSPIZ,
401 XVRSQRTEDP,
402 XVRSQRTESP,
403 XVSUBDP,
404 XVSUBSP,
405 VCFSX,
406 VCFSX_0,
407 VCFUX,
408 VCFUX_0,
409 VMHRADDSHS,
410 VMLADDUHM,
411 VMSUMMBM,
412 VMSUMSHM,
413 VMSUMSHS,
414 VMSUMUBM,
415 VMSUMUHM,
416 VMSUMUHS,
417 VMULESB,
418 VMULESH,
419 VMULESW,
420 VMULEUB,
421 VMULEUH,
422 VMULEUW,
423 VMULOSB,
424 VMULOSH,
425 VMULOSW,
426 VMULOUB,
427 VMULOUH,
428 VMULOUW,
429 VMULUWM,
430 VSUM2SWS,
431 VSUM4SBS,
432 VSUM4SHS,
433 VSUM4UBS,
434 VSUMSWS
435)>;
436
Stefan Pintilie590eb272017-09-22 20:17:25 +0000437// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
438// dispatch units for the superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000439def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
440 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000441 FRSP,
442 FRIND,
443 FRINS,
444 FRIPD,
445 FRIPS,
446 FRIZD,
447 FRIZS,
448 FRIMD,
449 FRIMS,
450 FRE,
451 FRES,
452 FRSQRTE,
453 FRSQRTES,
454 FMADDS,
455 FMADD,
456 FMSUBS,
457 FMSUB,
458 FNMADDS,
459 FNMADD,
460 FNMSUBS,
461 FNMSUB,
462 FSELD,
463 FSELS,
464 FADDS,
465 FMULS,
466 FMUL,
467 FSUBS,
468 FCFID,
469 FCTID,
470 FCTIDZ,
471 FCFIDU,
472 FCFIDS,
473 FCFIDUS,
474 FCTIDUZ,
475 FCTIWUZ,
476 FCTIW,
477 FCTIWZ,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000478 XSMADDADP,
479 XSMADDASP,
480 XSMADDMDP,
481 XSMADDMSP,
482 XSMSUBADP,
483 XSMSUBASP,
484 XSMSUBMDP,
485 XSMSUBMSP,
486 XSMULDP,
487 XSMULSP,
488 XSNMADDADP,
489 XSNMADDASP,
490 XSNMADDMDP,
491 XSNMADDMSP,
492 XSNMSUBADP,
493 XSNMSUBASP,
494 XSNMSUBMDP,
495 XSNMSUBMSP
496)>;
497
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000498// 7 cycle Restricted DP operation and one 2 cycle ALU operation.
499// The DP is restricted so we need a full 5 dispatches.
500def : InstRW<[P9_DPOpAndALUOp_9C, IP_EXEC_1C, IP_EXEC_1C,
501 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
502 (instrs
503 FMULo,
504 FMADDo,
505 FMSUBo,
506 FNMADDo,
507 FNMSUBo
508)>;
509
Stefan Pintilie590eb272017-09-22 20:17:25 +0000510// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000511def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
512 (instrs
513 XSADDDP,
514 XSADDSP,
515 XSCVDPHP,
516 XSCVDPSP,
517 XSCVDPSXDS,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000518 XSCVDPSXDSs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000519 XSCVDPSXWS,
520 XSCVDPUXDS,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000521 XSCVDPUXDSs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000522 XSCVDPUXWS,
523 XSCVHPDP,
524 XSCVSPDP,
525 XSCVSXDDP,
526 XSCVSXDSP,
527 XSCVUXDDP,
528 XSCVUXDSP,
529 XSRDPI,
530 XSRDPIC,
531 XSRDPIM,
532 XSRDPIP,
533 XSRDPIZ,
534 XSREDP,
535 XSRESP,
536 //XSRSP,
537 XSRSQRTEDP,
538 XSRSQRTESP,
539 XSSUBDP,
540 XSSUBSP,
541 XSCVDPSPN
542)>;
543
Stefan Pintilie590eb272017-09-22 20:17:25 +0000544// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
545// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
546// dispatches.
547def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000548 (instrs
549 VBPERMQ,
550 VCLZLSBB,
551 VCTZLSBB,
552 VEXTRACTD,
553 VEXTRACTUB,
554 VEXTRACTUH,
555 VEXTRACTUW,
556 VEXTUBLX,
557 VEXTUBRX,
558 VEXTUHLX,
559 VEXTUHRX,
560 VEXTUWLX,
561 VEXTUWRX,
562 VGBBD,
563 VINSERTB,
564 VINSERTD,
565 VINSERTH,
566 VINSERTW,
567 VMRGHB,
568 VMRGHH,
569 VMRGHW,
570 VMRGLB,
571 VMRGLH,
572 VMRGLW,
573 VPERM,
574 VPERMR,
575 VPERMXOR,
576 VPKPX,
577 VPKSDSS,
578 VPKSDUS,
579 VPKSHSS,
580 VPKSHUS,
581 VPKSWSS,
582 VPKSWUS,
583 VPKUDUM,
584 VPKUDUS,
585 VPKUHUM,
586 VPKUHUS,
587 VPKUWUM,
588 VPKUWUS,
589 VPRTYBQ,
590 VSL,
591 VSLDOI,
592 VSLO,
593 VSLV,
594 VSPLTB,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000595 VSPLTBs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000596 VSPLTH,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000597 VSPLTHs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000598 VSPLTISB,
599 VSPLTISH,
600 VSPLTISW,
601 VSPLTW,
602 VSR,
603 VSRO,
604 VSRV,
605 VUPKHPX,
606 VUPKHSB,
607 VUPKHSH,
608 VUPKHSW,
609 VUPKLPX,
610 VUPKLSB,
611 VUPKLSH,
612 VUPKLSW,
613 XXBRD,
614 XXBRH,
615 XXBRQ,
616 XXBRW,
617 XXEXTRACTUW,
618 XXINSERTW,
619 XXMRGHW,
620 XXMRGLW,
621 XXPERM,
622 XXPERMR,
623 XXSLDWI,
624 XXSPLTIB,
625 XXSPLTW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000626 XXSPLTWs,
627 XXPERMDI,
628 XXPERMDIs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000629 VADDCUQ,
630 VADDECUQ,
631 VADDEUQM,
632 VADDUQM,
633 VMUL10CUQ,
634 VMUL10ECUQ,
635 VMUL10EUQ,
636 VMUL10UQ,
637 VSUBCUQ,
638 VSUBECUQ,
639 VSUBEUQM,
640 VSUBUQM,
641 XSCMPEXPQP,
642 XSCMPOQP,
643 XSCMPUQP,
644 XSTSTDCQP,
645 XSXSIGQP
646)>;
647
Stefan Pintilie590eb272017-09-22 20:17:25 +0000648// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
649// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
650// dispatches.
651def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000652 (instrs
653 XSADDQP,
654 XSADDQPO,
655 XSCVDPQP,
656 XSCVQPDP,
657 XSCVQPDPO,
658 XSCVQPSDZ,
659 XSCVQPSWZ,
660 XSCVQPUDZ,
661 XSCVQPUWZ,
662 XSCVSDQP,
663 XSCVUDQP,
664 XSRQPI,
665 XSRQPXP,
666 XSSUBQP,
667 XSSUBQPO
668)>;
669
Stefan Pintilie590eb272017-09-22 20:17:25 +0000670// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
671// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
672// dispatches.
673def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000674 (instrs
675 XSMADDQP,
676 XSMADDQPO,
677 XSMSUBQP,
678 XSMSUBQPO,
679 XSMULQP,
680 XSMULQPO,
681 XSNMADDQP,
682 XSNMADDQPO,
683 XSNMSUBQP,
684 XSNMSUBQPO
685)>;
686
Stefan Pintilie590eb272017-09-22 20:17:25 +0000687// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
688// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
689// dispatches.
690def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000691 (instrs
692 XSDIVQP,
693 XSDIVQPO
694)>;
695
Stefan Pintilie590eb272017-09-22 20:17:25 +0000696// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
697// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
698// dispatches.
699def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000700 (instrs
701 XSSQRTQP,
702 XSSQRTQPO
703)>;
704
Stefan Pintilie590eb272017-09-22 20:17:25 +0000705// 5 Cycle load uses a single slice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000706def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
707 (instrs
708 LXSDX,
709 LXVD2X,
710 LXSIWZX,
711 LXV,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000712 LXVX,
713 LXSD,
Tony Jiang438bf4a2017-11-20 14:38:30 +0000714 DFLOADf64,
715 XFLOADf64
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000716)>;
717
Stefan Pintilie590eb272017-09-22 20:17:25 +0000718// 4 Cycle load uses a single slice.
719def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
720 (instrs
721 COPY
722)>;
723
724// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
725// superslice.
726def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000727 (instrs
728 LFIWZX,
729 LFDX,
730 LFD
731)>;
732
Stefan Pintilie590eb272017-09-22 20:17:25 +0000733// Cracked Restricted Load instruction.
734// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
735// operations cannot be done at the same time and so their latencies are added.
736// Full 6 dispatches are required as this is both cracked and restricted.
737def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000738 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
739 (instrs
740 LFIWAX,
741 LFSX,
742 LFS
743)>;
744
Stefan Pintilie590eb272017-09-22 20:17:25 +0000745// Cracked Load instruction.
746// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
747// operations cannot be done at the same time and so their latencies are added.
748// Full 4 dispatches are required as this is a cracked instruction.
749def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
750 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
751 (instrs
752 LXSSPX,
753 LXSIWAX,
754 LXSSP,
Tony Jiang438bf4a2017-11-20 14:38:30 +0000755 DFLOADf32,
756 XFLOADf32,
757 LIWAX,
758 LIWZX
Stefan Pintilie590eb272017-09-22 20:17:25 +0000759)>;
760
761// Cracked Load that requires the PM resource.
762// Since the Load and the PM cannot be done at the same time the latencies are
763// added. Requires 8 cycles.
764// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
765// as well as 3 dispatches for the PM. The Load requires the remaining 2
766// dispatches.
767def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
768 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000769 (instrs
770 LXVDSX,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000771 LXVWSX,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000772 LXVW4X
773)>;
774
Stefan Pintilie590eb272017-09-22 20:17:25 +0000775// Single slice Restricted store operation. The restricted operation requires
776// all three dispatches for the superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000777def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
778 (instrs
779 STFS,
780 STFD,
781 STFIWX,
782 STFSX,
783 STFDX,
784 STXSDX,
785 STXSSPX,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000786 STXSIWX,
787 DFSTOREf32,
Tony Jiang438bf4a2017-11-20 14:38:30 +0000788 DFSTOREf64,
789 XFSTOREf32,
790 XFSTOREf64,
791 STIWX
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000792)>;
793
Stefan Pintilie590eb272017-09-22 20:17:25 +0000794// Store operation that requires the whole superslice.
795def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
796 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000797 (instrs
798 STXVD2X,
799 STXVW4X
800)>;
801
802
Stefan Pintilie590eb272017-09-22 20:17:25 +0000803// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
804// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
805// dispatches.
806def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
807 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000808 (instrs
809 DIVW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000810 DIVWU,
811 MODSW
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000812)>;
813
Stefan Pintilie590eb272017-09-22 20:17:25 +0000814// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
815// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
816// dispatches.
817def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
818 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000819 (instrs
820 DIVWE,
821 DIVD,
822 DIVWEU,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000823 DIVDU,
824 MODSD,
825 MODUD,
826 MODUW
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000827)>;
828
Stefan Pintilie590eb272017-09-22 20:17:25 +0000829// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
830// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
831// dispatches.
832def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
833 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000834 (instrs
835 DIVDE,
836 DIVDEU
837)>;
838
Stefan Pintilie590eb272017-09-22 20:17:25 +0000839// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
840// and one full superslice for the DIV operation since there is only one DIV
841// per superslice. Latency of DIV plus ALU is 26.
842def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
843 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000844 (instrs
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000845 DIVDo,
846 DIVDUo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000847 DIVWEo,
848 DIVWEUo
849)>;
850
Stefan Pintilie590eb272017-09-22 20:17:25 +0000851// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
852// and one full superslice for the DIV operation since there is only one DIV
853// per superslice. Latency of DIV plus ALU is 42.
854def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
855 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000856 (instrs
857 DIVDEo,
858 DIVDEUo
859)>;
860
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000861// CR access instructions in _BrMCR, IIC_BrMCRX.
862
Stefan Pintilie590eb272017-09-22 20:17:25 +0000863// Cracked, restricted, ALU operations.
864// Here the two ALU ops can actually be done in parallel and therefore the
865// latencies are not added together. Otherwise this is like having two
866// instructions running together on two pipelines and 6 dispatches.
867// ALU ops are 2 cycles each.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000868def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
869 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
870 (instrs
871 MTOCRF,
872 MTOCRF8,
873 MTCRF,
874 MTCRF8
875)>;
876
Stefan Pintilie590eb272017-09-22 20:17:25 +0000877// Cracked, restricted, ALU operations.
878// Here the two ALU ops can actually be done in parallel and therefore the
879// latencies are not added together. Otherwise this is like having two
880// instructions running together on two pipelines and 6 dispatches.
881// ALU ops are 3 cycles each.
882def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000883 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
884 (instrs
885 MCRFS
886)>;
887
888// FP Div instructions in IIC_FPDivD and IIC_FPDivS.
889
Stefan Pintilie590eb272017-09-22 20:17:25 +0000890// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000891def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
892 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000893 FDIV
894)>;
895
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000896// 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
897def : InstRW<[P9_DPOpAndALUOp_35C_8, IP_EXEC_1C, IP_EXEC_1C,
898 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
899 (instrs
900 FDIVo
901)>;
902
Stefan Pintilie590eb272017-09-22 20:17:25 +0000903// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
904def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
905 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000906 XSDIVDP
907)>;
908
Stefan Pintilie590eb272017-09-22 20:17:25 +0000909// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000910def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
911 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000912 FDIVS
913)>;
914
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000915// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
916def : InstRW<[P9_DPOpAndALUOp_24C_5, IP_EXEC_1C, IP_EXEC_1C,
917 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
918 (instrs
919 FDIVSo
920)>;
921
Stefan Pintilie590eb272017-09-22 20:17:25 +0000922// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
923def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
924 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000925 XSDIVSP
926)>;
927
Stefan Pintilie590eb272017-09-22 20:17:25 +0000928// 24 Cycle DP Vector Instruction. Takes one full superslice.
929// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
930// superslice.
931def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
932 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000933 (instrs
934 XVDIVSP
935)>;
936
Stefan Pintilie590eb272017-09-22 20:17:25 +0000937// 33 Cycle DP Vector Instruction. Takes one full superslice.
938// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
939// superslice.
940def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
941 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000942 (instrs
943 XVDIVDP
944)>;
945
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000946// Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.
947
Stefan Pintilie590eb272017-09-22 20:17:25 +0000948// Instruction cracked into three pieces. One Load and two ALU operations.
949// The Load and one of the ALU ops cannot be run at the same time and so the
950// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
951// Both the load and the ALU that depends on it are restricted and so they take
952// a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
953// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
954def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000955 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
956 DISP_1C, DISP_1C, DISP_1C, DISP_1C,
957 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
958 (instrs
959 LFSU,
960 LFSUX
961)>;
962
Stefan Pintilie590eb272017-09-22 20:17:25 +0000963// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
964// the load and so it can be run at the same time as the load. The load is also
965// restricted. 3 dispatches are from the restricted load while the other two
966// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
967// is required for the ALU.
968def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000969 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
970 (instrs
971 LFDU,
972 LFDUX
973)>;
974
Stefan Pintilie590eb272017-09-22 20:17:25 +0000975// Crypto Instructions
976
977// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
978// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
979// dispatches.
980def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
981 (instrs
982 VPMSUMB,
983 VPMSUMD,
984 VPMSUMH,
985 VPMSUMW,
986 VCIPHER,
987 VCIPHERLAST,
988 VNCIPHER,
989 VNCIPHERLAST,
990 VSBOX
991)>;