blob: 9a6f4b5dd42980621d3906b555a7e0047dfbbd81 [file] [log] [blame]
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines resources required by some of P9 instruction. This is part
11// P9 processor model used for instruction scheduling. Not every instruction
12// is listed here. Instructions in this file belong to itinerary classes that
13// have instructions with different resource requirements.
14//
Stefan Pintilie590eb272017-09-22 20:17:25 +000015// The makeup of the P9 CPU is modeled as follows:
16// - Each CPU is made up of two superslices.
17// - Each superslice is made up of two slices. Therefore, there are 4 slices
18// for each CPU.
19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20// - Each CPU has:
21// - One CY (Crypto) unit P9_CY_*
22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23// - Two PM (Permute) units. One on each superslice. P9_PM_*
24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26// - Four DP (Floating Point) units. One on each slice. P9_DP_*
27// This also includes fixed point multiply add.
28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29// - Four Load/Store Queues. P9_LS_*
30// - Each set of instructions will require a number of these resources.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000031//===----------------------------------------------------------------------===//
32
Stefan Pintilie590eb272017-09-22 20:17:25 +000033// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000036def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
Stefan Pintilie590eb272017-09-22 20:17:25 +000037 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000038 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +000039 (instregex "VADDU(B|H|W|D)M$"),
Stefan Pintiliee894e0f2018-03-01 16:16:08 +000040 (instregex "VAND(C)?$"),
Stefan Pintilieb5a94402018-03-02 14:41:38 +000041 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000042 VEQV,
Stefan Pintilie590eb272017-09-22 20:17:25 +000043 VRLB,
44 VRLD,
45 VRLDMI,
46 VRLDNM,
47 VRLH,
48 VRLW,
49 VRLWMI,
50 VRLWNM,
51 VSRAB,
52 VSRAD,
53 VSRAH,
54 VSRAW,
55 VSRB,
56 VSRD,
57 VSRH,
58 VSRW,
59 VSLB,
60 VSLD,
61 VSLH,
62 VSLW,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000063 VMRGEW,
64 VMRGOW,
65 VNAND,
66 VNEGD,
67 VNEGW,
68 VNOR,
69 VOR,
70 VORC,
71 VPOPCNTB,
72 VPOPCNTH,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000073 VSEL,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000074 VSUBUBM,
75 VSUBUDM,
76 VSUBUHM,
77 VSUBUWM,
78 VXOR,
79 V_SET0B,
80 V_SET0H,
81 V_SET0,
82 XVABSDP,
83 XVABSSP,
84 XVCPSGNDP,
85 XVCPSGNSP,
86 XVIEXPDP,
87 XVNABSDP,
88 XVNABSSP,
89 XVNEGDP,
90 XVNEGSP,
91 XVXEXPDP,
Stefan Pintilie590eb272017-09-22 20:17:25 +000092 XVIEXPSP,
93 XVXEXPSP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +000094 XXLAND,
95 XXLANDC,
96 XXLEQV,
97 XXLNAND,
98 XXLNOR,
99 XXLOR,
100 XXLORf,
101 XXLORC,
102 XXLXOR,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000103 XXSEL,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000104 XSABSQP,
105 XSCPSGNQP,
106 XSIEXPQP,
107 XSNABSQP,
108 XSNEGQP,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000109 XSXEXPQP
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000110)>;
111
Stefan Pintilie590eb272017-09-22 20:17:25 +0000112// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
113// slingle slice. However, since it is Restricted it requires all 3 dispatches
114// (DISP) for that superslice.
115def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000116 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000117 FCMPUS,
118 FCMPUD,
119 XSTSTDCDP,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000120 XSTSTDCSP,
121 FTDIV,
122 FTSQRT,
Stefan Pintilied45db612018-03-05 14:34:59 +0000123 CMPEQB,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000124 (instregex "CMPRB(8)?$"),
125 (instregex "TD(I)?$"),
126 (instregex "TW(I)?$")
Stefan Pintilie590eb272017-09-22 20:17:25 +0000127)>;
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000128
Stefan Pintilie590eb272017-09-22 20:17:25 +0000129// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
130def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
131 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +0000132 (instregex "XSMAX(C|J)?DP$"),
133 (instregex "XSMIN(C|J)?DP$"),
134 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000135 XSTDIVDP,
136 XSTSQRTDP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000137 XSXSIGDP,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000138 XSCVSPDPN,
139 SETB,
140 BPERMD,
Stefan Pintilied45db612018-03-05 14:34:59 +0000141 (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
Stefan Pintilie626b6512018-02-23 20:37:10 +0000142 (instregex "POPCNT(D|W)$"),
143 (instregex "CMPB(8)?$")
Stefan Pintilie590eb272017-09-22 20:17:25 +0000144)>;
145
146// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
147def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
148 (instrs
149 ADDIStocHA,
150 ADDItocL,
151 MCRF,
152 MCRXRX,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000153 XSNABSDP,
154 XSXEXPDP,
155 XSABSDP,
156 XSNEGDP,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000157 XSCPSGNDP,
158 (instregex "S(L|R)D$"),
159 (instregex "SRAD(I)?$"),
160 (instregex "EXTSWSLI$"),
161 SRADI_32,
162 RLDIC,
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000163 RFEBB,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000164 LA,
165 (instregex "CMP(WI|LWI|W|LW)(8)?$"),
Stefan Pintilied45db612018-03-05 14:34:59 +0000166 (instregex "CMP(L)?D(I)?$"),
Stefan Pintilie626b6512018-02-23 20:37:10 +0000167 (instregex "SUBF(I)?C(8)?$"),
168 (instregex "ANDI(S)?o(8)?$"),
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000169 (instregex "ADDC(8)?$"),
170 (instregex "ADDIC(8)?(o)?$"),
171 (instregex "ADD(8|4)(o)?$"),
172 (instregex "ADD(E|ME|ZE)(8)?(o)?$"),
Stefan Pintilie626b6512018-02-23 20:37:10 +0000173 (instregex "SUBF(E|ME|ZE)?(8)?$"),
174 (instregex "NEG(8)?$"),
175 (instregex "POPCNTB$"),
176 (instregex "ADD(I|IS)?(8)?$"),
177 (instregex "LI(S)?(8)?$"),
178 (instregex "(X)?OR(I|IS)?(8)?$"),
179 NOP,
180 (instregex "NAND(8)?$"),
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000181 (instregex "AND(C)?(8)?(o)?$"),
Stefan Pintilie626b6512018-02-23 20:37:10 +0000182 (instregex "NOR(8)?$"),
183 (instregex "OR(C)?(8)?$"),
Stefan Pintilied45db612018-03-05 14:34:59 +0000184 (instregex "EQV(8)?(o)?$"),
Stefan Pintilieb5a94402018-03-02 14:41:38 +0000185 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
Stefan Pintilie626b6512018-02-23 20:37:10 +0000186 (instregex "ADD(4|8)(TLS)?(_)?$"),
187 (instregex "NEG(8)?$")
Stefan Pintilie590eb272017-09-22 20:17:25 +0000188)>;
189
190// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
191// slingle slice. However, since it is Restricted it requires all 3 dispatches
192// (DISP) for that superslice.
193def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
194 (instrs
195 RLDCL,
196 RLDCR,
197 RLDIMI,
198 RLDICL,
199 RLDICR,
200 RLDICL_32_64,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000201 RLDICL_32,
202 RLDICR_32,
203 (instregex "RLWIMI(8)?$"),
Stefan Pintilie590eb272017-09-22 20:17:25 +0000204 XSIEXPDP,
205 FMR,
Stefan Pintilied45db612018-03-05 14:34:59 +0000206 CREQV,
207 CRXOR,
208 (instregex "CR(6)?(UN)?SET$"),
209 (instregex "CR(N)?(OR|AND)(C)?$"),
Stefan Pintilie626b6512018-02-23 20:37:10 +0000210 (instregex "S(L|R)W(8)?$"),
211 (instregex "RLW(INM|NM)(8)?$"),
212 (instregex "F(N)?ABS(D|S)$"),
213 (instregex "FNEG(D|S)$"),
214 (instregex "FCPSGN(D|S)$"),
215 (instregex "SRAW(I)?$"),
216 (instregex "ISEL(8)?$")
Stefan Pintilie590eb272017-09-22 20:17:25 +0000217)>;
218
219// Three cycle ALU vector operation that uses an entire superslice.
220// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
221// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
222def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
223 DISP_1C, DISP_1C, DISP_1C],
224 (instrs
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000225 (instregex "VCMPNEZ(B|H|W)$"),
226 VCMPEQUB,
227 VCMPEQUD,
228 VCMPEQUH,
229 VCMPEQUW,
230 VCMPNEB,
231 VCMPNEH,
232 VCMPNEW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000233 VBPERMD,
234 VABSDUB,
235 VABSDUH,
236 VABSDUW,
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000237 VADDCUW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000238 VADDUBS,
239 VADDUHS,
240 VADDUWS,
241 VAVGSB,
242 VAVGSH,
243 VAVGSW,
244 VAVGUB,
245 VAVGUH,
246 VAVGUW,
247 VCMPEQFP,
248 VCMPEQFPo,
249 VCMPGEFP,
250 VCMPGEFPo,
251 VCMPBFP,
252 VCMPBFPo,
253 VCMPGTFP,
254 VCMPGTFPo,
255 VCLZB,
256 VCLZD,
257 VCLZH,
258 VCLZW,
259 VCTZB,
260 VCTZD,
261 VCTZH,
262 VCTZW,
263 VADDSBS,
264 VADDSHS,
265 VADDSWS,
266 VMINFP,
267 VMINSB,
268 VMINSD,
269 VMINSH,
270 VMINSW,
271 VMINUB,
272 VMINUD,
273 VMINUH,
274 VMINUW,
275 VMAXFP,
276 VMAXSB,
277 VMAXSD,
278 VMAXSH,
279 VMAXSW,
280 VMAXUB,
281 VMAXUD,
282 VMAXUH,
283 VMAXUW,
284 VPOPCNTW,
285 VPOPCNTD,
286 VPRTYBD,
287 VPRTYBW,
288 VSHASIGMAD,
289 VSHASIGMAW,
290 VSUBSBS,
291 VSUBSHS,
292 VSUBSWS,
293 VSUBUBS,
294 VSUBUHS,
295 VSUBUWS,
296 VSUBCUW,
297 VCMPGTSB,
298 VCMPGTSBo,
299 VCMPGTSD,
300 VCMPGTSDo,
301 VCMPGTSH,
302 VCMPGTSHo,
303 VCMPGTSW,
304 VCMPGTSWo,
305 VCMPGTUB,
306 VCMPGTUBo,
307 VCMPGTUD,
308 VCMPGTUDo,
309 VCMPGTUH,
310 VCMPGTUHo,
311 VCMPGTUW,
312 VCMPGTUWo,
313 VCMPNEBo,
314 VCMPNEHo,
315 VCMPNEWo,
316 VCMPNEZBo,
317 VCMPNEZHo,
318 VCMPNEZWo,
319 VCMPEQUBo,
320 VCMPEQUDo,
321 VCMPEQUHo,
322 VCMPEQUWo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000323 XVCMPEQDP,
324 XVCMPEQDPo,
325 XVCMPEQSP,
326 XVCMPEQSPo,
327 XVCMPGEDP,
328 XVCMPGEDPo,
329 XVCMPGESP,
330 XVCMPGESPo,
331 XVCMPGTDP,
332 XVCMPGTDPo,
333 XVCMPGTSP,
334 XVCMPGTSPo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000335 XVMAXDP,
336 XVMAXSP,
337 XVMINDP,
338 XVMINSP,
339 XVTDIVDP,
340 XVTDIVSP,
341 XVTSQRTDP,
342 XVTSQRTSP,
343 XVTSTDCDP,
344 XVTSTDCSP,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000345 XVXSIGDP,
346 XVXSIGSP
347)>;
348
Stefan Pintilie590eb272017-09-22 20:17:25 +0000349// 7 cycle DP vector operation that uses an entire superslice.
350// Uses both DP units (the even DPE and odd DPO units), two pipelines
351// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
352def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
353 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000354 (instrs
355 VADDFP,
356 VCTSXS,
357 VCTSXS_0,
358 VCTUXS,
359 VCTUXS_0,
360 VEXPTEFP,
361 VLOGEFP,
362 VMADDFP,
363 VMHADDSHS,
364 VNMSUBFP,
365 VREFP,
366 VRFIM,
367 VRFIN,
368 VRFIP,
369 VRFIZ,
370 VRSQRTEFP,
371 VSUBFP,
372 XVADDDP,
373 XVADDSP,
374 XVCVDPSP,
375 XVCVDPSXDS,
376 XVCVDPSXWS,
377 XVCVDPUXDS,
378 XVCVDPUXWS,
379 XVCVHPSP,
380 XVCVSPDP,
381 XVCVSPHP,
382 XVCVSPSXDS,
383 XVCVSPSXWS,
384 XVCVSPUXDS,
385 XVCVSPUXWS,
386 XVCVSXDDP,
387 XVCVSXDSP,
388 XVCVSXWDP,
389 XVCVSXWSP,
390 XVCVUXDDP,
391 XVCVUXDSP,
392 XVCVUXWDP,
393 XVCVUXWSP,
394 XVMADDADP,
395 XVMADDASP,
396 XVMADDMDP,
397 XVMADDMSP,
398 XVMSUBADP,
399 XVMSUBASP,
400 XVMSUBMDP,
401 XVMSUBMSP,
402 XVMULDP,
403 XVMULSP,
404 XVNMADDADP,
405 XVNMADDASP,
406 XVNMADDMDP,
407 XVNMADDMSP,
408 XVNMSUBADP,
409 XVNMSUBASP,
410 XVNMSUBMDP,
411 XVNMSUBMSP,
412 XVRDPI,
413 XVRDPIC,
414 XVRDPIM,
415 XVRDPIP,
416 XVRDPIZ,
417 XVREDP,
418 XVRESP,
419 XVRSPI,
420 XVRSPIC,
421 XVRSPIM,
422 XVRSPIP,
423 XVRSPIZ,
424 XVRSQRTEDP,
425 XVRSQRTESP,
426 XVSUBDP,
427 XVSUBSP,
428 VCFSX,
429 VCFSX_0,
430 VCFUX,
431 VCFUX_0,
432 VMHRADDSHS,
433 VMLADDUHM,
434 VMSUMMBM,
435 VMSUMSHM,
436 VMSUMSHS,
437 VMSUMUBM,
438 VMSUMUHM,
439 VMSUMUHS,
440 VMULESB,
441 VMULESH,
442 VMULESW,
443 VMULEUB,
444 VMULEUH,
445 VMULEUW,
446 VMULOSB,
447 VMULOSH,
448 VMULOSW,
449 VMULOUB,
450 VMULOUH,
451 VMULOUW,
452 VMULUWM,
453 VSUM2SWS,
454 VSUM4SBS,
455 VSUM4SHS,
456 VSUM4UBS,
457 VSUMSWS
458)>;
459
Stefan Pintilie590eb272017-09-22 20:17:25 +0000460// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
461// dispatch units for the superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000462def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
463 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000464 FRSP,
Stefan Pintilied45db612018-03-05 14:34:59 +0000465 (instregex "FRI(N|P|Z|M)(D|S)$"),
466 (instregex "FRE(S)?$"),
467 (instregex "FADD(S)?$"),
468 (instregex "FMSUB(S)?$"),
469 (instregex "FMADD(S)?$"),
Stefan Pintilie590eb272017-09-22 20:17:25 +0000470 FRSQRTE,
471 FRSQRTES,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000472 FNMADDS,
473 FNMADD,
474 FNMSUBS,
475 FNMSUB,
476 FSELD,
477 FSELS,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000478 FMULS,
479 FMUL,
Stefan Pintilied45db612018-03-05 14:34:59 +0000480 (instregex "FSUB(S)?$"),
481 (instregex "FCFID(U)?(S)?$"),
482 (instregex "FCTID(U)?(Z)?$"),
483 (instregex "FCTIW(U)?(Z)?$"),
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000484 XSMADDADP,
485 XSMADDASP,
486 XSMADDMDP,
487 XSMADDMSP,
488 XSMSUBADP,
489 XSMSUBASP,
490 XSMSUBMDP,
491 XSMSUBMSP,
492 XSMULDP,
493 XSMULSP,
494 XSNMADDADP,
495 XSNMADDASP,
496 XSNMADDMDP,
497 XSNMADDMSP,
498 XSNMSUBADP,
499 XSNMSUBASP,
500 XSNMSUBMDP,
501 XSNMSUBMSP
502)>;
503
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000504// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
Stefan Pintilied45db612018-03-05 14:34:59 +0000505// These operations can be done in parallel.
506// The DP is restricted so we need a full 5 dispatches.
507def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
508 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
509 (instrs
510 (instregex "FSEL(D|S)o$")
511)>;
512
513// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
514// These operations must be done sequentially.
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000515// The DP is restricted so we need a full 5 dispatches.
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000516def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000517 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
518 (instrs
Stefan Pintilied45db612018-03-05 14:34:59 +0000519 (instregex "FRI(N|P|Z|M)(D|S)o$"),
520 (instregex "FRE(S)?o$"),
521 (instregex "FADD(S)?o$"),
522 (instregex "FSUB(S)?o$"),
523 (instregex "F(N)?MSUB(S)?o$"),
524 (instregex "F(N)?MADD(S)?o$"),
525 (instregex "FCFID(U)?(S)?o$"),
526 (instregex "FCTID(U)?(Z)?o$"),
527 (instregex "FCTIW(U)?(Z)?o$"),
528 (instregex "FMUL(S)?o$"),
529 (instregex "FRSQRTE(S)?o$"),
530 FRSPo
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000531)>;
532
Stefan Pintilie590eb272017-09-22 20:17:25 +0000533// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000534def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
535 (instrs
536 XSADDDP,
537 XSADDSP,
538 XSCVDPHP,
539 XSCVDPSP,
540 XSCVDPSXDS,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000541 XSCVDPSXDSs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000542 XSCVDPSXWS,
543 XSCVDPUXDS,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000544 XSCVDPUXDSs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000545 XSCVDPUXWS,
546 XSCVHPDP,
547 XSCVSPDP,
548 XSCVSXDDP,
549 XSCVSXDSP,
550 XSCVUXDDP,
551 XSCVUXDSP,
552 XSRDPI,
553 XSRDPIC,
554 XSRDPIM,
555 XSRDPIP,
556 XSRDPIZ,
557 XSREDP,
558 XSRESP,
559 //XSRSP,
560 XSRSQRTEDP,
561 XSRSQRTESP,
562 XSSUBDP,
563 XSSUBSP,
564 XSCVDPSPN
565)>;
566
Stefan Pintilie590eb272017-09-22 20:17:25 +0000567// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
568// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
569// dispatches.
570def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000571 (instrs
572 VBPERMQ,
573 VCLZLSBB,
574 VCTZLSBB,
575 VEXTRACTD,
576 VEXTRACTUB,
577 VEXTRACTUH,
578 VEXTRACTUW,
579 VEXTUBLX,
580 VEXTUBRX,
581 VEXTUHLX,
582 VEXTUHRX,
583 VEXTUWLX,
584 VEXTUWRX,
585 VGBBD,
586 VINSERTB,
587 VINSERTD,
588 VINSERTH,
589 VINSERTW,
590 VMRGHB,
591 VMRGHH,
592 VMRGHW,
593 VMRGLB,
594 VMRGLH,
595 VMRGLW,
596 VPERM,
597 VPERMR,
598 VPERMXOR,
599 VPKPX,
600 VPKSDSS,
601 VPKSDUS,
602 VPKSHSS,
603 VPKSHUS,
604 VPKSWSS,
605 VPKSWUS,
606 VPKUDUM,
607 VPKUDUS,
608 VPKUHUM,
609 VPKUHUS,
610 VPKUWUM,
611 VPKUWUS,
612 VPRTYBQ,
613 VSL,
614 VSLDOI,
615 VSLO,
616 VSLV,
617 VSPLTB,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000618 VSPLTBs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000619 VSPLTH,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000620 VSPLTHs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000621 VSPLTISB,
622 VSPLTISH,
623 VSPLTISW,
624 VSPLTW,
625 VSR,
626 VSRO,
627 VSRV,
628 VUPKHPX,
629 VUPKHSB,
630 VUPKHSH,
631 VUPKHSW,
632 VUPKLPX,
633 VUPKLSB,
634 VUPKLSH,
635 VUPKLSW,
636 XXBRD,
637 XXBRH,
638 XXBRQ,
639 XXBRW,
640 XXEXTRACTUW,
641 XXINSERTW,
642 XXMRGHW,
643 XXMRGLW,
644 XXPERM,
645 XXPERMR,
646 XXSLDWI,
647 XXSPLTIB,
648 XXSPLTW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000649 XXSPLTWs,
650 XXPERMDI,
651 XXPERMDIs,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000652 VADDCUQ,
653 VADDECUQ,
654 VADDEUQM,
655 VADDUQM,
656 VMUL10CUQ,
657 VMUL10ECUQ,
658 VMUL10EUQ,
659 VMUL10UQ,
660 VSUBCUQ,
661 VSUBECUQ,
662 VSUBEUQM,
663 VSUBUQM,
664 XSCMPEXPQP,
665 XSCMPOQP,
666 XSCMPUQP,
667 XSTSTDCQP,
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000668 XSXSIGQP,
669 BCDCFNo,
670 BCDCFZo,
671 BCDCPSGNo,
672 BCDCTNo,
673 BCDCTZo,
674 BCDSETSGNo,
675 BCDSo,
676 BCDTRUNCo,
677 BCDUSo,
678 BCDUTRUNCo
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000679)>;
680
Stefan Pintilie590eb272017-09-22 20:17:25 +0000681// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
682// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
683// dispatches.
684def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000685 (instrs
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000686 BCDSRo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000687 XSADDQP,
688 XSADDQPO,
689 XSCVDPQP,
690 XSCVQPDP,
691 XSCVQPDPO,
692 XSCVQPSDZ,
693 XSCVQPSWZ,
694 XSCVQPUDZ,
695 XSCVQPUWZ,
696 XSCVSDQP,
697 XSCVUDQP,
698 XSRQPI,
699 XSRQPXP,
700 XSSUBQP,
701 XSSUBQPO
702)>;
703
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000704// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
705// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
706// dispatches.
707def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
708 (instrs
709 BCDCTSQo
710)>;
711
Stefan Pintilie590eb272017-09-22 20:17:25 +0000712// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
713// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
714// dispatches.
715def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000716 (instrs
717 XSMADDQP,
718 XSMADDQPO,
719 XSMSUBQP,
720 XSMSUBQPO,
721 XSMULQP,
722 XSMULQPO,
723 XSNMADDQP,
724 XSNMADDQPO,
725 XSNMSUBQP,
726 XSNMSUBQPO
727)>;
728
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000729// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
730// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
731// dispatches.
732def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
733 (instrs
734 BCDCFSQo
735)>;
736
Stefan Pintilie590eb272017-09-22 20:17:25 +0000737// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
738// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
739// dispatches.
740def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000741 (instrs
742 XSDIVQP,
743 XSDIVQPO
744)>;
745
Stefan Pintilie590eb272017-09-22 20:17:25 +0000746// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
747// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
748// dispatches.
749def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000750 (instrs
751 XSSQRTQP,
752 XSSQRTQPO
753)>;
754
Stefan Pintilie590eb272017-09-22 20:17:25 +0000755// 5 Cycle load uses a single slice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000756def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
757 (instrs
758 LXSDX,
759 LXVD2X,
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000760 LXVWSX,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000761 LXSIWZX,
762 LXV,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000763 LXVX,
764 LXSD,
Tony Jiang438bf4a2017-11-20 14:38:30 +0000765 DFLOADf64,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000766 XFLOADf64,
767 LIWZX
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000768)>;
769
Stefan Pintilie590eb272017-09-22 20:17:25 +0000770// 4 Cycle load uses a single slice.
771def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
772 (instrs
Stefan Pintilied45db612018-03-05 14:34:59 +0000773 COPY,
774 CP_ABORT,
775 DARN,
776 EnforceIEIO,
777 ISYNC,
778 (instregex "DCB(F|T|ST)(EP)?$"),
779 (instregex "DCBZ(L)?(EP)?$"),
780 (instregex "DCBTST(EP)?$"),
781 (instregex "CP_COPY(8)?$"),
782 (instregex "CP_PASTE(8)?$"),
783 (instregex "ICBI(EP)?$"),
784 (instregex "ICBT(LS)?$"),
785 (instregex "LBARX(L)?$"),
786 (instregex "LBZ(CIX|8|X|X8)?$"),
787 (instregex "LD(ARX|ARXL|BRX|CIX|X)?$")
Stefan Pintilie590eb272017-09-22 20:17:25 +0000788)>;
789
790// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
791// superslice.
792def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000793 (instrs
794 LFIWZX,
795 LFDX,
796 LFD
797)>;
798
Stefan Pintilied45db612018-03-05 14:34:59 +0000799// Cracked Load instruction.
800// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
801// operations cannot be done at the same time and so their latencies are added.
802def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
803 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
804 (instrs
805 (instregex "LHA(8)?$"),
806 (instregex "CP_PASTE(8)?o$")
807)>;
808
Stefan Pintilie590eb272017-09-22 20:17:25 +0000809// Cracked Restricted Load instruction.
810// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
811// operations cannot be done at the same time and so their latencies are added.
812// Full 6 dispatches are required as this is both cracked and restricted.
813def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000814 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
815 (instrs
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000816 LFIWAX
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000817)>;
818
Stefan Pintilie590eb272017-09-22 20:17:25 +0000819// Cracked Load instruction.
820// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
821// operations cannot be done at the same time and so their latencies are added.
822// Full 4 dispatches are required as this is a cracked instruction.
823def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
824 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
825 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +0000826 LXSIWAX,
Stefan Pintilie626b6512018-02-23 20:37:10 +0000827 LIWAX
Stefan Pintilie590eb272017-09-22 20:17:25 +0000828)>;
829
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000830// Cracked Load instruction.
831// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
832// cycles. The Load and ALU operations cannot be done at the same time and so
833// their latencies are added.
834// Full 6 dispatches are required as this is a restricted instruction.
835def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
836 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
837 (instrs
838 LFSX,
839 LFS
840)>;
841
842// Cracked Load instruction.
843// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
844// operations cannot be done at the same time and so their latencies are added.
845// Full 4 dispatches are required as this is a cracked instruction.
846def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
847 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
848 (instrs
849 LXSSP,
850 LXSSPX,
851 XFLOADf32,
852 DFLOADf32
853)>;
854
Stefan Pintilie590eb272017-09-22 20:17:25 +0000855// Cracked Load that requires the PM resource.
856// Since the Load and the PM cannot be done at the same time the latencies are
857// added. Requires 8 cycles.
858// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
859// as well as 3 dispatches for the PM. The Load requires the remaining 2
860// dispatches.
861def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
862 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000863 (instrs
864 LXVDSX,
865 LXVW4X
866)>;
867
Stefan Pintilie590eb272017-09-22 20:17:25 +0000868// Single slice Restricted store operation. The restricted operation requires
869// all three dispatches for the superslice.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000870def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
871 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +0000872 (instregex "STF(S|D|IWX|SX|DX)$"),
873 (instregex "STXS(DX|SPX|IWX)$"),
Stefan Pintilie590eb272017-09-22 20:17:25 +0000874 DFSTOREf32,
Tony Jiang438bf4a2017-11-20 14:38:30 +0000875 DFSTOREf64,
876 XFSTOREf32,
877 XFSTOREf64,
878 STIWX
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000879)>;
880
Stefan Pintilie590eb272017-09-22 20:17:25 +0000881// Store operation that requires the whole superslice.
882def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
883 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000884 (instrs
885 STXVD2X,
886 STXVW4X
887)>;
888
Stefan Pintilie626b6512018-02-23 20:37:10 +0000889// Cracked instruction made up up two restriced stores.
890def : InstRW<[P9_LS_1C, P9_LS_1C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
891 IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
892 (instrs
893 STFDEPX
894)>;
895
896// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
897// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
898// dispatches.
899def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
900 (instrs
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000901 (instregex "M(T|F)VRSAVE(v)?$"),
902 (instregex "MF(SPR|CTR|LR)(8)?$"),
903 MFDCR
Stefan Pintilie626b6512018-02-23 20:37:10 +0000904)>;
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000905
Stefan Pintilie590eb272017-09-22 20:17:25 +0000906// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
907// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
908// dispatches.
909def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
910 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000911 (instrs
912 DIVW,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000913 DIVWU,
914 MODSW
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000915)>;
916
Stefan Pintilie590eb272017-09-22 20:17:25 +0000917// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
918// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
919// dispatches.
920def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
921 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000922 (instrs
923 DIVWE,
924 DIVD,
925 DIVWEU,
Stefan Pintilie590eb272017-09-22 20:17:25 +0000926 DIVDU,
927 MODSD,
928 MODUD,
929 MODUW
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000930)>;
931
Stefan Pintilie590eb272017-09-22 20:17:25 +0000932// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
933// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
934// dispatches.
935def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
936 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000937 (instrs
938 DIVDE,
939 DIVDEU
940)>;
941
Stefan Pintilie590eb272017-09-22 20:17:25 +0000942// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
943// and one full superslice for the DIV operation since there is only one DIV
944// per superslice. Latency of DIV plus ALU is 26.
Stefan Pintilied45db612018-03-05 14:34:59 +0000945def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
946 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
947 (instrs
948 (instregex "DIVW(U)?(O)?o$")
949)>;
950
951// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
952// and one full superslice for the DIV operation since there is only one DIV
953// per superslice. Latency of DIV plus ALU is 26.
Stefan Pintilie590eb272017-09-22 20:17:25 +0000954def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
955 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000956 (instrs
Stefan Pintiliecc330da2017-10-10 13:45:35 +0000957 DIVDo,
958 DIVDUo,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000959 DIVWEo,
960 DIVWEUo
961)>;
962
Stefan Pintilie590eb272017-09-22 20:17:25 +0000963// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
964// and one full superslice for the DIV operation since there is only one DIV
965// per superslice. Latency of DIV plus ALU is 42.
966def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
967 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000968 (instrs
969 DIVDEo,
970 DIVDEUo
971)>;
972
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000973// CR access instructions in _BrMCR, IIC_BrMCRX.
974
Stefan Pintilie590eb272017-09-22 20:17:25 +0000975// Cracked, restricted, ALU operations.
976// Here the two ALU ops can actually be done in parallel and therefore the
977// latencies are not added together. Otherwise this is like having two
978// instructions running together on two pipelines and 6 dispatches.
979// ALU ops are 2 cycles each.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +0000980def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
981 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
982 (instrs
983 MTOCRF,
984 MTOCRF8,
985 MTCRF,
986 MTCRF8
987)>;
988
Stefan Pintiliee894e0f2018-03-01 16:16:08 +0000989// Cracked ALU operations.
990// Here the two ALU ops can actually be done in parallel and therefore the
991// latencies are not added together. Otherwise this is like having two
992// instructions running together on two pipelines and 4 dispatches.
993// ALU ops are 2 cycles each.
994def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
995 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
996 (instrs
997 (instregex "ADDC(8)?o$")
998)>;
999
Stefan Pintilied45db612018-03-05 14:34:59 +00001000// Cracked ALU operations.
1001// Two ALU ops can be done in parallel.
1002// One is three cycle ALU the ohter is a two cycle ALU.
1003// One of the ALU ops is restricted the other is not so we have a total of
1004// 5 dispatches.
1005def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1006 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1007 (instrs
1008 (instregex "F(N)?ABS(D|S)o$"),
1009 (instregex "FCPSGN(D|S)o$"),
1010 (instregex "FNEG(D|S)o$"),
1011 FMRo
1012)>;
1013
1014// Cracked ALU operations.
Stefan Pintilie590eb272017-09-22 20:17:25 +00001015// Here the two ALU ops can actually be done in parallel and therefore the
1016// latencies are not added together. Otherwise this is like having two
Stefan Pintilied45db612018-03-05 14:34:59 +00001017// instructions running together on two pipelines and 4 dispatches.
Stefan Pintilie590eb272017-09-22 20:17:25 +00001018// ALU ops are 3 cycles each.
1019def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001020 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1021 (instrs
1022 MCRFS
1023)>;
1024
Stefan Pintilieb5a94402018-03-02 14:41:38 +00001025// Cracked instruction made of two ALU ops.
1026// The two ops cannot be done in parallel.
1027// One of the the ALU ops is restricted and takes 3 dispatches.
Stefan Pintilie626b6512018-02-23 20:37:10 +00001028def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1029 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1030 (instrs
1031 RLWINMo
1032)>;
1033
Stefan Pintilieb5a94402018-03-02 14:41:38 +00001034// Cracked instruction made of two ALU ops.
1035// The two ops cannot be done in parallel.
1036def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1037 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1038 (instrs
1039 (instregex "EXTSWSLIo$")
1040)>;
1041
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001042// FP Div instructions in IIC_FPDivD and IIC_FPDivS.
1043
Stefan Pintilie590eb272017-09-22 20:17:25 +00001044// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001045def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1046 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +00001047 FDIV
1048)>;
1049
Stefan Pintilied45db612018-03-05 14:34:59 +00001050// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
Stefan Pintiliee894e0f2018-03-01 16:16:08 +00001051def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
Stefan Pintiliecc330da2017-10-10 13:45:35 +00001052 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1053 (instrs
1054 FDIVo
1055)>;
1056
Stefan Pintilied45db612018-03-05 14:34:59 +00001057// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1058def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1059 (instrs
1060 FSQRT
1061)>;
1062
1063// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1064def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1065 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1066 (instrs
1067 FSQRTo
1068)>;
1069
1070// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1071def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1072 (instrs
1073 FSQRTS
1074)>;
1075
1076// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1077def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1078 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1079 (instrs
1080 FSQRTSo
1081)>;
1082
Stefan Pintilie590eb272017-09-22 20:17:25 +00001083// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
1084def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
1085 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001086 XSDIVDP
1087)>;
1088
Stefan Pintilie590eb272017-09-22 20:17:25 +00001089// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001090def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1091 (instrs
Stefan Pintilie590eb272017-09-22 20:17:25 +00001092 FDIVS
1093)>;
1094
Stefan Pintiliecc330da2017-10-10 13:45:35 +00001095// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
Stefan Pintiliee894e0f2018-03-01 16:16:08 +00001096def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
Stefan Pintiliecc330da2017-10-10 13:45:35 +00001097 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1098 (instrs
1099 FDIVSo
1100)>;
1101
Stefan Pintilie590eb272017-09-22 20:17:25 +00001102// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
1103def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
1104 (instrs
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001105 XSDIVSP
1106)>;
1107
Stefan Pintilie590eb272017-09-22 20:17:25 +00001108// 24 Cycle DP Vector Instruction. Takes one full superslice.
1109// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
1110// superslice.
1111def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1112 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001113 (instrs
1114 XVDIVSP
1115)>;
1116
Stefan Pintilie590eb272017-09-22 20:17:25 +00001117// 33 Cycle DP Vector Instruction. Takes one full superslice.
1118// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
1119// superslice.
1120def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1121 DISP_1C, DISP_1C, DISP_1C],
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001122 (instrs
1123 XVDIVDP
1124)>;
1125
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001126// Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.
1127
Stefan Pintilie590eb272017-09-22 20:17:25 +00001128// Instruction cracked into three pieces. One Load and two ALU operations.
1129// The Load and one of the ALU ops cannot be run at the same time and so the
1130// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1131// Both the load and the ALU that depends on it are restricted and so they take
1132// a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
1133// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
Stefan Pintiliee894e0f2018-03-01 16:16:08 +00001134def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001135 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1136 DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1137 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1138 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +00001139 (instregex "LF(SU|SUX)$")
1140)>;
1141
1142// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1143// the store and so it can be run at the same time as the store. The store is
1144// also restricted.
1145def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1146 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1147 (instrs
1148 (instregex "STF(SU|SUX|DU|DUX)$")
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001149)>;
1150
Stefan Pintilie590eb272017-09-22 20:17:25 +00001151// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
Stefan Pintilied45db612018-03-05 14:34:59 +00001152// the load and so it can be run at the same time as the load.
1153def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1154 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1155 (instrs
1156 (instregex "LBZU(X)?(8)?$"),
1157 (instregex "LDU(X)?$")
1158)>;
1159
1160
1161// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
Stefan Pintilie590eb272017-09-22 20:17:25 +00001162// the load and so it can be run at the same time as the load. The load is also
1163// restricted. 3 dispatches are from the restricted load while the other two
1164// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1165// is required for the ALU.
1166def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001167 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1168 (instrs
Stefan Pintilie626b6512018-02-23 20:37:10 +00001169 (instregex "LF(DU|DUX)$")
Ehsan Amiri6c17bb02016-12-19 13:35:45 +00001170)>;
1171
Stefan Pintilie590eb272017-09-22 20:17:25 +00001172// Crypto Instructions
1173
1174// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1175// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
1176// dispatches.
1177def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
1178 (instrs
Stefan Pintiliee894e0f2018-03-01 16:16:08 +00001179 (instregex "VPMSUM(B|H|W|D)$"),
1180 (instregex "V(N)?CIPHER(LAST)?$"),
1181 VSBOX
Stefan Pintilie590eb272017-09-22 20:17:25 +00001182)>;
Stefan Pintilie626b6512018-02-23 20:37:10 +00001183
Stefan Pintiliee894e0f2018-03-01 16:16:08 +00001184// Branch Instructions
1185
1186// Two Cycle Branch
1187def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
1188 (instrs
1189 (instregex "BCCCTR(L)?(8)?$"),
1190 (instregex "BCCL(A|R|RL)?$"),
1191 (instregex "BCCTR(L)?(8)?(n)?$"),
1192 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1193 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1194 (instregex "BL(_TLS)?$"),
1195 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
1196 (instregex "BLA(8|8_NOP)?$"),
1197 (instregex "BLR(8|L)?$"),
1198 (instregex "TAILB(A)?(8)?$"),
1199 (instregex "TAILBCTR(8)?$"),
1200 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1201 (instregex "BCLR(L)?(n)?$"),
1202 (instregex "BCTR(L)?(8)?$"),
1203 B,
1204 BA,
1205 BC,
1206 BCC,
1207 BCCA,
1208 BCL,
1209 BCLalways,
1210 BCLn,
1211 BCTRL8_LDinto_toc,
1212 BCn,
1213 CTRL_DEP
1214)>;
1215
1216// Five Cycle Branch with a 2 Cycle ALU Op
1217// Operations must be done consecutively and not in parallel.
1218def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
1219 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1220 (instrs
1221 ADDPCIS
1222)>;
1223
Stefan Pintilied45db612018-03-05 14:34:59 +00001224// Special Extracted Instructions
1225
1226// Atomic Load
1227def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1228 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1229 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1230 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1231 DISP_1C],
1232 (instrs
1233 LDAT
1234)>;
1235
Stefan Pintilieb5a94402018-03-02 14:41:38 +00001236// Signal Processing Engine (SPE) Instructions
1237// These instructions are not supported on Power 9
1238def : InstRW<[],
1239 (instrs
1240 BRINC,
1241 EVABS,
1242 EVEQV,
1243 EVMRA,
1244 EVNAND,
1245 EVNEG,
1246 (instregex "EVADD(I)?W$"),
1247 (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1248 (instregex "EVAND(C)?$"),
1249 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1250 (instregex "EVCNTL(S|Z)W$"),
1251 (instregex "EVDIVW(S|U)$"),
1252 (instregex "EVEXTS(B|H)$"),
1253 (instregex "EVLD(H|W|D)(X)?$"),
1254 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1255 (instregex "EVLWHE(X)?$"),
1256 (instregex "EVLWHO(S|U)(X)?$"),
1257 (instregex "EVLW(H|W)SPLAT(X)?$"),
1258 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1259 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1260 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1261 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1262 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1263 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1264 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1265 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1266 (instregex "EVMWHUMI(A)?$"),
1267 (instregex "EVMWLS(M|S)IA(A|N)W$"),
1268 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1269 (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1270 (instregex "EVMWSSF(A|AA|AN)?$"),
1271 (instregex "EVMWUMI(A|AA|AN)?$"),
1272 (instregex "EV(N|X)?OR(C)?$"),
1273 (instregex "EVR(LW|LWI|NDW)$"),
1274 (instregex "EVSLW(I)?$"),
1275 (instregex "EVSPLAT(F)?I$"),
1276 (instregex "EVSRW(I)?(S|U)$"),
1277 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1278 (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1279 (instregex "EVSUB(I)?FW$")
1280)> { let Unsupported = 1; }
Stefan Pintiliee894e0f2018-03-01 16:16:08 +00001281
Stefan Pintilieb5a94402018-03-02 14:41:38 +00001282// General Instructions without scheduling support.
Stefan Pintilie626b6512018-02-23 20:37:10 +00001283def : InstRW<[],
1284 (instrs
Stefan Pintiliee894e0f2018-03-01 16:16:08 +00001285 (instregex "(H)?RFI(D)?$"),
Stefan Pintilied45db612018-03-05 14:34:59 +00001286 (instregex "DSS(ALL)?$"),
1287 (instregex "DST(ST)?(T)?(64)?$"),
1288 (instregex "ICBL(C|Q)$"),
Stefan Pintiliee894e0f2018-03-01 16:16:08 +00001289 ATTN,
Stefan Pintiliee894e0f2018-03-01 16:16:08 +00001290 CLRBHRB,
1291 MFBHRBE,
1292 NAP,
1293 RFCI,
1294 RFDI,
1295 RFMCI,
1296 SC,
Stefan Pintilied45db612018-03-05 14:34:59 +00001297 WAIT,
1298 DCBA,
1299 DCBI,
1300 DCCCI,
1301 ICCCI,
1302 LBEPX
Stefan Pintilie626b6512018-02-23 20:37:10 +00001303)> { let Unsupported = 1; }