blob: f7310b54448a2b074758fc6aa8b1b354d7fd670d [file] [log] [blame]
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines resources required by some of P9 instruction. This is part
11// P9 processor model used for instruction scheduling. Not every instruction
12// is listed here. Instructions in this file belong to itinerary classes that
13// have instructions with different resource requirements.
14//
Stefan Pintilie590eb272017-09-22 20:17:25 +000015// The makeup of the P9 CPU is modeled as follows:
16// - Each CPU is made up of two superslices.
17// - Each superslice is made up of two slices. Therefore, there are 4 slices
18// for each CPU.
19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20// - Each CPU has:
21// - One CY (Crypto) unit P9_CY_*
22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23// - Two PM (Permute) units. One on each superslice. P9_PM_*
24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26// - Four DP (Floating Point) units. One on each slice. P9_DP_*
27// This also includes fixed point multiply add.
28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29// - Four Load/Store Queues. P9_LS_*
30// - Each set of instructions will require a number of these resources.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000031//===----------------------------------------------------------------------===//
32
Stefan Pintilie590eb272017-09-22 20:17:25 +000033// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000036def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
Stefan Pintilie590eb272017-09-22 20:17:25 +000037 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000038 (instrs
39 VADDCUW,
40 VADDUBM,
41 VADDUDM,
42 VADDUHM,
43 VADDUWM,
44 VAND,
45 VANDC,
46 VCMPEQUB,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000047 VCMPEQUD,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000048 VCMPEQUH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000049 VCMPEQUW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000050 VCMPNEB,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000051 VCMPNEH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000052 VCMPNEW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000053 VCMPNEZB,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000054 VCMPNEZH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000055 VCMPNEZW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000056 VEQV,
57 VEXTSB2D,
58 VEXTSB2W,
59 VEXTSH2D,
60 VEXTSH2W,
61 VEXTSW2D,
Stefan Pintilie590eb272017-09-22 20:17:25 +000062 VRLB,
63 VRLD,
64 VRLDMI,
65 VRLDNM,
66 VRLH,
67 VRLW,
68 VRLWMI,
69 VRLWNM,
70 VSRAB,
71 VSRAD,
72 VSRAH,
73 VSRAW,
74 VSRB,
75 VSRD,
76 VSRH,
77 VSRW,
78 VSLB,
79 VSLD,
80 VSLH,
81 VSLW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000082 VMRGEW,
83 VMRGOW,
84 VNAND,
85 VNEGD,
86 VNEGW,
87 VNOR,
88 VOR,
89 VORC,
90 VPOPCNTB,
91 VPOPCNTH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000092 VSEL,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000093 VSUBUBM,
94 VSUBUDM,
95 VSUBUHM,
96 VSUBUWM,
97 VXOR,
98 V_SET0B,
99 V_SET0H,
100 V_SET0,
101 XVABSDP,
102 XVABSSP,
103 XVCPSGNDP,
104 XVCPSGNSP,
105 XVIEXPDP,
106 XVNABSDP,
107 XVNABSSP,
108 XVNEGDP,
109 XVNEGSP,
110 XVXEXPDP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000111 XVIEXPSP,
112 XVXEXPSP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000113 XXLAND,
114 XXLANDC,
115 XXLEQV,
116 XXLNAND,
117 XXLNOR,
118 XXLOR,
119 XXLORf,
120 XXLORC,
121 XXLXOR,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000122 XXSEL,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000123 XSABSQP,
124 XSCPSGNQP,
125 XSIEXPQP,
126 XSNABSQP,
127 XSNEGQP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000128 XSXEXPQP
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000129)>;
130
Stefan Pintilie590eb272017-09-22 20:17:25 +0000131// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
132// slingle slice. However, since it is Restricted it requires all 3 dispatches
133// (DISP) for that superslice.
134def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000135 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000136 FCMPUS,
137 FCMPUD,
138 XSTSTDCDP,
139 XSTSTDCSP
140)>;
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000141
Stefan Pintilie590eb272017-09-22 20:17:25 +0000142// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
143def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
144 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000145 XSMAXCDP,
146 XSMAXDP,
147 XSMAXJDP,
148 XSMINCDP,
149 XSMINDP,
150 XSMINJDP,
151 XSTDIVDP,
152 XSTSQRTDP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000153 XSCMPEQDP,
154 XSCMPEXPDP,
155 XSCMPGEDP,
156 XSCMPGTDP,
157 XSCMPODP,
158 XSCMPUDP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000159 XSXSIGDP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000160 XSCVSPDPN
161)>;
162
163// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
164def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
165 (instrs
166 ADDIStocHA,
167 ADDItocL,
168 MCRF,
169 MCRXRX,
170 SLD,
171 SRD,
172 SRAD,
173 SRADI,
174 RLDIC,
175 XSNABSDP,
176 XSXEXPDP,
177 XSABSDP,
178 XSNEGDP,
179 XSCPSGNDP
180)>;
181
182// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
183// slingle slice. However, since it is Restricted it requires all 3 dispatches
184// (DISP) for that superslice.
185def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
186 (instrs
187 RLDCL,
188 RLDCR,
189 RLDIMI,
190 RLDICL,
191 RLDICR,
192 RLDICL_32_64,
193 XSIEXPDP,
194 FMR,
195 FABSD,
196 FABSS,
197 FNABSD,
198 FNABSS,
199 FNEGD,
200 FNEGS,
201 FCPSGND,
202 FCPSGNS
203)>;
204
205// Three cycle ALU vector operation that uses an entire superslice.
206// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
207// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
208def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
209 DISP_1C, DISP_1C, DISP_1C],
210 (instrs
211 VBPERMD,
212 VABSDUB,
213 VABSDUH,
214 VABSDUW,
215 VADDUBS,
216 VADDUHS,
217 VADDUWS,
218 VAVGSB,
219 VAVGSH,
220 VAVGSW,
221 VAVGUB,
222 VAVGUH,
223 VAVGUW,
224 VCMPEQFP,
225 VCMPEQFPo,
226 VCMPGEFP,
227 VCMPGEFPo,
228 VCMPBFP,
229 VCMPBFPo,
230 VCMPGTFP,
231 VCMPGTFPo,
232 VCLZB,
233 VCLZD,
234 VCLZH,
235 VCLZW,
236 VCTZB,
237 VCTZD,
238 VCTZH,
239 VCTZW,
240 VADDSBS,
241 VADDSHS,
242 VADDSWS,
243 VMINFP,
244 VMINSB,
245 VMINSD,
246 VMINSH,
247 VMINSW,
248 VMINUB,
249 VMINUD,
250 VMINUH,
251 VMINUW,
252 VMAXFP,
253 VMAXSB,
254 VMAXSD,
255 VMAXSH,
256 VMAXSW,
257 VMAXUB,
258 VMAXUD,
259 VMAXUH,
260 VMAXUW,
261 VPOPCNTW,
262 VPOPCNTD,
263 VPRTYBD,
264 VPRTYBW,
265 VSHASIGMAD,
266 VSHASIGMAW,
267 VSUBSBS,
268 VSUBSHS,
269 VSUBSWS,
270 VSUBUBS,
271 VSUBUHS,
272 VSUBUWS,
273 VSUBCUW,
274 VCMPGTSB,
275 VCMPGTSBo,
276 VCMPGTSD,
277 VCMPGTSDo,
278 VCMPGTSH,
279 VCMPGTSHo,
280 VCMPGTSW,
281 VCMPGTSWo,
282 VCMPGTUB,
283 VCMPGTUBo,
284 VCMPGTUD,
285 VCMPGTUDo,
286 VCMPGTUH,
287 VCMPGTUHo,
288 VCMPGTUW,
289 VCMPGTUWo,
290 VCMPNEBo,
291 VCMPNEHo,
292 VCMPNEWo,
293 VCMPNEZBo,
294 VCMPNEZHo,
295 VCMPNEZWo,
296 VCMPEQUBo,
297 VCMPEQUDo,
298 VCMPEQUHo,
299 VCMPEQUWo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000300 XVCMPEQDP,
301 XVCMPEQDPo,
302 XVCMPEQSP,
303 XVCMPEQSPo,
304 XVCMPGEDP,
305 XVCMPGEDPo,
306 XVCMPGESP,
307 XVCMPGESPo,
308 XVCMPGTDP,
309 XVCMPGTDPo,
310 XVCMPGTSP,
311 XVCMPGTSPo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000312 XVMAXDP,
313 XVMAXSP,
314 XVMINDP,
315 XVMINSP,
316 XVTDIVDP,
317 XVTDIVSP,
318 XVTSQRTDP,
319 XVTSQRTSP,
320 XVTSTDCDP,
321 XVTSTDCSP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000322 XVXSIGDP,
323 XVXSIGSP
324)>;
325
Stefan Pintilie590eb272017-09-22 20:17:25 +0000326// 7 cycle DP vector operation that uses an entire superslice.
327// Uses both DP units (the even DPE and odd DPO units), two pipelines
328// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
329def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
330 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000331 (instrs
332 VADDFP,
333 VCTSXS,
334 VCTSXS_0,
335 VCTUXS,
336 VCTUXS_0,
337 VEXPTEFP,
338 VLOGEFP,
339 VMADDFP,
340 VMHADDSHS,
341 VNMSUBFP,
342 VREFP,
343 VRFIM,
344 VRFIN,
345 VRFIP,
346 VRFIZ,
347 VRSQRTEFP,
348 VSUBFP,
349 XVADDDP,
350 XVADDSP,
351 XVCVDPSP,
352 XVCVDPSXDS,
353 XVCVDPSXWS,
354 XVCVDPUXDS,
355 XVCVDPUXWS,
356 XVCVHPSP,
357 XVCVSPDP,
358 XVCVSPHP,
359 XVCVSPSXDS,
360 XVCVSPSXWS,
361 XVCVSPUXDS,
362 XVCVSPUXWS,
363 XVCVSXDDP,
364 XVCVSXDSP,
365 XVCVSXWDP,
366 XVCVSXWSP,
367 XVCVUXDDP,
368 XVCVUXDSP,
369 XVCVUXWDP,
370 XVCVUXWSP,
371 XVMADDADP,
372 XVMADDASP,
373 XVMADDMDP,
374 XVMADDMSP,
375 XVMSUBADP,
376 XVMSUBASP,
377 XVMSUBMDP,
378 XVMSUBMSP,
379 XVMULDP,
380 XVMULSP,
381 XVNMADDADP,
382 XVNMADDASP,
383 XVNMADDMDP,
384 XVNMADDMSP,
385 XVNMSUBADP,
386 XVNMSUBASP,
387 XVNMSUBMDP,
388 XVNMSUBMSP,
389 XVRDPI,
390 XVRDPIC,
391 XVRDPIM,
392 XVRDPIP,
393 XVRDPIZ,
394 XVREDP,
395 XVRESP,
396 XVRSPI,
397 XVRSPIC,
398 XVRSPIM,
399 XVRSPIP,
400 XVRSPIZ,
401 XVRSQRTEDP,
402 XVRSQRTESP,
403 XVSUBDP,
404 XVSUBSP,
405 VCFSX,
406 VCFSX_0,
407 VCFUX,
408 VCFUX_0,
409 VMHRADDSHS,
410 VMLADDUHM,
411 VMSUMMBM,
412 VMSUMSHM,
413 VMSUMSHS,
414 VMSUMUBM,
415 VMSUMUHM,
416 VMSUMUHS,
417 VMULESB,
418 VMULESH,
419 VMULESW,
420 VMULEUB,
421 VMULEUH,
422 VMULEUW,
423 VMULOSB,
424 VMULOSH,
425 VMULOSW,
426 VMULOUB,
427 VMULOUH,
428 VMULOUW,
429 VMULUWM,
430 VSUM2SWS,
431 VSUM4SBS,
432 VSUM4SHS,
433 VSUM4UBS,
434 VSUMSWS
435)>;
436
Stefan Pintilie590eb272017-09-22 20:17:25 +0000437// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
438// dispatch units for the superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000439def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
440 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000441 FRSP,
442 FRIND,
443 FRINS,
444 FRIPD,
445 FRIPS,
446 FRIZD,
447 FRIZS,
448 FRIMD,
449 FRIMS,
450 FRE,
451 FRES,
452 FRSQRTE,
453 FRSQRTES,
454 FMADDS,
455 FMADD,
456 FMSUBS,
457 FMSUB,
458 FNMADDS,
459 FNMADD,
460 FNMSUBS,
461 FNMSUB,
462 FSELD,
463 FSELS,
464 FADDS,
465 FMULS,
466 FMUL,
467 FSUBS,
468 FCFID,
469 FCTID,
470 FCTIDZ,
471 FCFIDU,
472 FCFIDS,
473 FCFIDUS,
474 FCTIDUZ,
475 FCTIWUZ,
476 FCTIW,
477 FCTIWZ,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000478 XSMADDADP,
479 XSMADDASP,
480 XSMADDMDP,
481 XSMADDMSP,
482 XSMSUBADP,
483 XSMSUBASP,
484 XSMSUBMDP,
485 XSMSUBMSP,
486 XSMULDP,
487 XSMULSP,
488 XSNMADDADP,
489 XSNMADDASP,
490 XSNMADDMDP,
491 XSNMADDMSP,
492 XSNMSUBADP,
493 XSNMSUBASP,
494 XSNMSUBMDP,
495 XSNMSUBMSP
496)>;
497
Stefan Pintilie590eb272017-09-22 20:17:25 +0000498// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000499def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
500 (instrs
501 XSADDDP,
502 XSADDSP,
503 XSCVDPHP,
504 XSCVDPSP,
505 XSCVDPSXDS,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000506 XSCVDPSXDSs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000507 XSCVDPSXWS,
508 XSCVDPUXDS,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000509 XSCVDPUXDSs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000510 XSCVDPUXWS,
511 XSCVHPDP,
512 XSCVSPDP,
513 XSCVSXDDP,
514 XSCVSXDSP,
515 XSCVUXDDP,
516 XSCVUXDSP,
517 XSRDPI,
518 XSRDPIC,
519 XSRDPIM,
520 XSRDPIP,
521 XSRDPIZ,
522 XSREDP,
523 XSRESP,
524 //XSRSP,
525 XSRSQRTEDP,
526 XSRSQRTESP,
527 XSSUBDP,
528 XSSUBSP,
529 XSCVDPSPN
530)>;
531
Stefan Pintilie590eb272017-09-22 20:17:25 +0000532// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
533// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
534// dispatches.
535def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000536 (instrs
537 VBPERMQ,
538 VCLZLSBB,
539 VCTZLSBB,
540 VEXTRACTD,
541 VEXTRACTUB,
542 VEXTRACTUH,
543 VEXTRACTUW,
544 VEXTUBLX,
545 VEXTUBRX,
546 VEXTUHLX,
547 VEXTUHRX,
548 VEXTUWLX,
549 VEXTUWRX,
550 VGBBD,
551 VINSERTB,
552 VINSERTD,
553 VINSERTH,
554 VINSERTW,
555 VMRGHB,
556 VMRGHH,
557 VMRGHW,
558 VMRGLB,
559 VMRGLH,
560 VMRGLW,
561 VPERM,
562 VPERMR,
563 VPERMXOR,
564 VPKPX,
565 VPKSDSS,
566 VPKSDUS,
567 VPKSHSS,
568 VPKSHUS,
569 VPKSWSS,
570 VPKSWUS,
571 VPKUDUM,
572 VPKUDUS,
573 VPKUHUM,
574 VPKUHUS,
575 VPKUWUM,
576 VPKUWUS,
577 VPRTYBQ,
578 VSL,
579 VSLDOI,
580 VSLO,
581 VSLV,
582 VSPLTB,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000583 VSPLTBs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000584 VSPLTH,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000585 VSPLTHs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000586 VSPLTISB,
587 VSPLTISH,
588 VSPLTISW,
589 VSPLTW,
590 VSR,
591 VSRO,
592 VSRV,
593 VUPKHPX,
594 VUPKHSB,
595 VUPKHSH,
596 VUPKHSW,
597 VUPKLPX,
598 VUPKLSB,
599 VUPKLSH,
600 VUPKLSW,
601 XXBRD,
602 XXBRH,
603 XXBRQ,
604 XXBRW,
605 XXEXTRACTUW,
606 XXINSERTW,
607 XXMRGHW,
608 XXMRGLW,
609 XXPERM,
610 XXPERMR,
611 XXSLDWI,
612 XXSPLTIB,
613 XXSPLTW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000614 XXSPLTWs,
615 XXPERMDI,
616 XXPERMDIs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000617 VADDCUQ,
618 VADDECUQ,
619 VADDEUQM,
620 VADDUQM,
621 VMUL10CUQ,
622 VMUL10ECUQ,
623 VMUL10EUQ,
624 VMUL10UQ,
625 VSUBCUQ,
626 VSUBECUQ,
627 VSUBEUQM,
628 VSUBUQM,
629 XSCMPEXPQP,
630 XSCMPOQP,
631 XSCMPUQP,
632 XSTSTDCQP,
633 XSXSIGQP
634)>;
635
Stefan Pintilie590eb272017-09-22 20:17:25 +0000636// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
637// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
638// dispatches.
639def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000640 (instrs
641 XSADDQP,
642 XSADDQPO,
643 XSCVDPQP,
644 XSCVQPDP,
645 XSCVQPDPO,
646 XSCVQPSDZ,
647 XSCVQPSWZ,
648 XSCVQPUDZ,
649 XSCVQPUWZ,
650 XSCVSDQP,
651 XSCVUDQP,
652 XSRQPI,
653 XSRQPXP,
654 XSSUBQP,
655 XSSUBQPO
656)>;
657
Stefan Pintilie590eb272017-09-22 20:17:25 +0000658// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
659// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
660// dispatches.
661def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000662 (instrs
663 XSMADDQP,
664 XSMADDQPO,
665 XSMSUBQP,
666 XSMSUBQPO,
667 XSMULQP,
668 XSMULQPO,
669 XSNMADDQP,
670 XSNMADDQPO,
671 XSNMSUBQP,
672 XSNMSUBQPO
673)>;
674
Stefan Pintilie590eb272017-09-22 20:17:25 +0000675// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
676// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
677// dispatches.
678def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000679 (instrs
680 XSDIVQP,
681 XSDIVQPO
682)>;
683
Stefan Pintilie590eb272017-09-22 20:17:25 +0000684// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
685// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
686// dispatches.
687def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000688 (instrs
689 XSSQRTQP,
690 XSSQRTQPO
691)>;
692
Stefan Pintilie590eb272017-09-22 20:17:25 +0000693// 5 Cycle load uses a single slice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000694def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
695 (instrs
696 LXSDX,
697 LXVD2X,
698 LXSIWZX,
699 LXV,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000700 LXVX,
701 LXSD,
702 DFLOADf64
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000703)>;
704
Stefan Pintilie590eb272017-09-22 20:17:25 +0000705// 4 Cycle load uses a single slice.
706def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
707 (instrs
708 COPY
709)>;
710
711// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
712// superslice.
713def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000714 (instrs
715 LFIWZX,
716 LFDX,
717 LFD
718)>;
719
Stefan Pintilie590eb272017-09-22 20:17:25 +0000720// Cracked Restricted Load instruction.
721// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
722// operations cannot be done at the same time and so their latencies are added.
723// Full 6 dispatches are required as this is both cracked and restricted.
724def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000725 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
726 (instrs
727 LFIWAX,
728 LFSX,
729 LFS
730)>;
731
Stefan Pintilie590eb272017-09-22 20:17:25 +0000732// Cracked Load instruction.
733// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
734// operations cannot be done at the same time and so their latencies are added.
735// Full 4 dispatches are required as this is a cracked instruction.
736def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
737 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
738 (instrs
739 LXSSPX,
740 LXSIWAX,
741 LXSSP,
742 DFLOADf32
743)>;
744
745// Cracked Load that requires the PM resource.
746// Since the Load and the PM cannot be done at the same time the latencies are
747// added. Requires 8 cycles.
748// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
749// as well as 3 dispatches for the PM. The Load requires the remaining 2
750// dispatches.
751def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
752 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000753 (instrs
754 LXVDSX,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000755 LXVWSX,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000756 LXVW4X
757)>;
758
Stefan Pintilie590eb272017-09-22 20:17:25 +0000759// Single slice Restricted store operation. The restricted operation requires
760// all three dispatches for the superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000761def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
762 (instrs
763 STFS,
764 STFD,
765 STFIWX,
766 STFSX,
767 STFDX,
768 STXSDX,
769 STXSSPX,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000770 STXSIWX,
771 DFSTOREf32,
772 DFSTOREf64
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000773)>;
774
Stefan Pintilie590eb272017-09-22 20:17:25 +0000775// Store operation that requires the whole superslice.
776def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
777 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000778 (instrs
779 STXVD2X,
780 STXVW4X
781)>;
782
783
Stefan Pintilie590eb272017-09-22 20:17:25 +0000784// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
785// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
786// dispatches.
787def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
788 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000789 (instrs
790 DIVW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000791 DIVWU,
792 MODSW
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000793)>;
794
Stefan Pintilie590eb272017-09-22 20:17:25 +0000795// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
796// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
797// dispatches.
798def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
799 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000800 (instrs
801 DIVWE,
802 DIVD,
803 DIVWEU,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000804 DIVDU,
805 MODSD,
806 MODUD,
807 MODUW
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000808)>;
809
Stefan Pintilie590eb272017-09-22 20:17:25 +0000810// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
811// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
812// dispatches.
813def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
814 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000815 (instrs
816 DIVDE,
817 DIVDEU
818)>;
819
Stefan Pintilie590eb272017-09-22 20:17:25 +0000820// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
821// and one full superslice for the DIV operation since there is only one DIV
822// per superslice. Latency of DIV plus ALU is 26.
823def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
824 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000825 (instrs
826 DIVWEo,
827 DIVWEUo
828)>;
829
Stefan Pintilie590eb272017-09-22 20:17:25 +0000830// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
831// and one full superslice for the DIV operation since there is only one DIV
832// per superslice. Latency of DIV plus ALU is 42.
833def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
834 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000835 (instrs
836 DIVDEo,
837 DIVDEUo
838)>;
839
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000840// CR access instructions in _BrMCR, IIC_BrMCRX.
841
Stefan Pintilie590eb272017-09-22 20:17:25 +0000842// Cracked, restricted, ALU operations.
843// Here the two ALU ops can actually be done in parallel and therefore the
844// latencies are not added together. Otherwise this is like having two
845// instructions running together on two pipelines and 6 dispatches.
846// ALU ops are 2 cycles each.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000847def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
848 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
849 (instrs
850 MTOCRF,
851 MTOCRF8,
852 MTCRF,
853 MTCRF8
854)>;
855
Stefan Pintilie590eb272017-09-22 20:17:25 +0000856// Cracked, restricted, ALU operations.
857// Here the two ALU ops can actually be done in parallel and therefore the
858// latencies are not added together. Otherwise this is like having two
859// instructions running together on two pipelines and 6 dispatches.
860// ALU ops are 3 cycles each.
861def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000862 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
863 (instrs
864 MCRFS
865)>;
866
867// FP Div instructions in IIC_FPDivD and IIC_FPDivS.
868
Stefan Pintilie590eb272017-09-22 20:17:25 +0000869// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000870def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
871 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000872 FDIV
873)>;
874
875// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
876def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
877 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000878 XSDIVDP
879)>;
880
Stefan Pintilie590eb272017-09-22 20:17:25 +0000881// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000882def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
883 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000884 FDIVS
885)>;
886
887// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
888def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
889 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000890 XSDIVSP
891)>;
892
Stefan Pintilie590eb272017-09-22 20:17:25 +0000893// 24 Cycle DP Vector Instruction. Takes one full superslice.
894// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
895// superslice.
896def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
897 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000898 (instrs
899 XVDIVSP
900)>;
901
Stefan Pintilie590eb272017-09-22 20:17:25 +0000902// 33 Cycle DP Vector Instruction. Takes one full superslice.
903// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
904// superslice.
905def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
906 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000907 (instrs
908 XVDIVDP
909)>;
910
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000911// Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.
912
Stefan Pintilie590eb272017-09-22 20:17:25 +0000913// Instruction cracked into three pieces. One Load and two ALU operations.
914// The Load and one of the ALU ops cannot be run at the same time and so the
915// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
916// Both the load and the ALU that depends on it are restricted and so they take
917// a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
918// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
919def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000920 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
921 DISP_1C, DISP_1C, DISP_1C, DISP_1C,
922 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
923 (instrs
924 LFSU,
925 LFSUX
926)>;
927
Stefan Pintilie590eb272017-09-22 20:17:25 +0000928// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
929// the load and so it can be run at the same time as the load. The load is also
930// restricted. 3 dispatches are from the restricted load while the other two
931// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
932// is required for the ALU.
933def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000934 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
935 (instrs
936 LFDU,
937 LFDUX
938)>;
939
Stefan Pintilie590eb272017-09-22 20:17:25 +0000940// Crypto Instructions
941
942// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
943// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
944// dispatches.
945def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
946 (instrs
947 VPMSUMB,
948 VPMSUMD,
949 VPMSUMH,
950 VPMSUMW,
951 VCIPHER,
952 VCIPHERLAST,
953 VNCIPHER,
954 VNCIPHERLAST,
955 VSBOX
956)>;