blob: 56086494a632747e8f6b81990bd793e765e9d18c [file] [log] [blame]
Nadav Roteme7b6a8a2013-03-28 22:34:46 +00001//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the machine model for Haswell to support instruction
11// scheduling and other instruction cost heuristics.
12//
13//===----------------------------------------------------------------------===//
14
15def HaswellModel : SchedMachineModel {
16 // All x86 instructions are modeled as a single micro-op, and HW can decode 4
17 // instructions per cycle.
18 let IssueWidth = 4;
Andrew Trick18dc3da2013-06-15 04:50:02 +000019 let MicroOpBufferSize = 192; // Based on the reorder buffer.
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000020 let LoadLatency = 4;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000021 let MispredictPenalty = 16;
Andrew Trickb6854d82013-09-25 18:14:12 +000022
Hal Finkel6532c202014-05-08 09:14:44 +000023 // Based on the LSD (loop-stream detector) queue size and benchmarking data.
24 let LoopMicroOpBufferSize = 50;
25
Andrew Trickb6854d82013-09-25 18:14:12 +000026 // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
27 // the scheduler to assign a default model to unrecognized opcodes.
28 let CompleteModel = 0;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000029}
30
31let SchedModel = HaswellModel in {
32
33// Haswell can issue micro-ops to 8 different ports in one cycle.
34
Quentin Colombet9e16c8a2014-01-29 18:26:59 +000035// Ports 0, 1, 5, and 6 handle all computation.
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000036// Port 4 gets the data half of stores. Store data can be available later than
37// the store address, but since we don't model the latency of stores, we can
38// ignore that.
39// Ports 2 and 3 are identical. They handle loads and the address half of
40// stores. Port 7 can handle address calculations.
41def HWPort0 : ProcResource<1>;
42def HWPort1 : ProcResource<1>;
43def HWPort2 : ProcResource<1>;
44def HWPort3 : ProcResource<1>;
45def HWPort4 : ProcResource<1>;
46def HWPort5 : ProcResource<1>;
47def HWPort6 : ProcResource<1>;
48def HWPort7 : ProcResource<1>;
49
50// Many micro-ops are capable of issuing on multiple ports.
51def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>;
52def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
53def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
Quentin Colombet9e16c8a2014-01-29 18:26:59 +000054def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000055def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
Quentin Colombetca498512014-02-24 19:33:51 +000056def HWPort16 : ProcResGroup<[HWPort1, HWPort6]>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000057def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
Quentin Colombetdf260592014-08-18 17:55:11 +000058def HWPort056: ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000059def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
60
Andrew Trick40c4f382013-06-15 04:50:06 +000061// 60 Entry Unified Scheduler
62def HWPortAny : ProcResGroup<[HWPort0, HWPort1, HWPort2, HWPort3, HWPort4,
63 HWPort5, HWPort6, HWPort7]> {
64 let BufferSize=60;
65}
66
Andrew Tricke1d88cf2013-04-02 01:58:47 +000067// Integer division issued on port 0.
68def HWDivider : ProcResource<1>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000069
70// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
71// cycles after the memory operand.
72def : ReadAdvance<ReadAfterLd, 4>;
73
74// Many SchedWrites are defined in pairs with and without a folded load.
75// Instructions with folded loads are usually micro-fused, so they only appear
76// as two micro-ops when queued in the reservation station.
77// This multiclass defines the resource usage for variants with and without
78// folded loads.
79multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
80 ProcResourceKind ExePort,
81 int Lat> {
82 // Register variant is using a single cycle on ExePort.
83 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
84
85 // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
86 // latency.
87 def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> {
88 let Latency = !add(Lat, 4);
89 }
90}
91
92// A folded store needs a cycle on port 4 for the store data, but it does not
93// need an extra port 2/3 cycle to recompute the address.
94def : WriteRes<WriteRMW, [HWPort4]>;
95
Quentin Colombet9e16c8a2014-01-29 18:26:59 +000096// Store_addr on 237.
97// Store_data on 4.
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000098def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
99def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; }
100def : WriteRes<WriteMove, [HWPort0156]>;
101def : WriteRes<WriteZero, []>;
102
103defm : HWWriteResPair<WriteALU, HWPort0156, 1>;
104defm : HWWriteResPair<WriteIMul, HWPort1, 3>;
Andrew Trick7201f4f2013-06-21 18:33:04 +0000105def : WriteRes<WriteIMulH, []> { let Latency = 3; }
Quentin Colombet9e16c8a2014-01-29 18:26:59 +0000106defm : HWWriteResPair<WriteShift, HWPort06, 1>;
107defm : HWWriteResPair<WriteJump, HWPort06, 1>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000108
109// This is for simple LEAs with one or two input operands.
110// The complex ones can only execute on port 1, and they require two cycles on
111// the port to read all inputs. We don't model that.
112def : WriteRes<WriteLEA, [HWPort15]>;
113
114// This is quite rough, latency depends on the dividend.
115def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> {
116 let Latency = 25;
117 let ResourceCycles = [1, 10];
118}
119def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> {
120 let Latency = 29;
121 let ResourceCycles = [1, 1, 10];
122}
123
124// Scalar and vector floating point.
125defm : HWWriteResPair<WriteFAdd, HWPort1, 3>;
126defm : HWWriteResPair<WriteFMul, HWPort0, 5>;
127defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles.
128defm : HWWriteResPair<WriteFRcp, HWPort0, 5>;
129defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
130defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
131defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
132defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
Quentin Colombetca498512014-02-24 19:33:51 +0000133defm : HWWriteResPair<WriteFShuffle, HWPort5, 1>;
134defm : HWWriteResPair<WriteFBlend, HWPort015, 1>;
135defm : HWWriteResPair<WriteFShuffle256, HWPort5, 3>;
136
137def : WriteRes<WriteFVarBlend, [HWPort5]> {
138 let Latency = 2;
139 let ResourceCycles = [2];
140}
141def : WriteRes<WriteFVarBlendLd, [HWPort5, HWPort23]> {
142 let Latency = 6;
143 let ResourceCycles = [2, 1];
144}
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000145
146// Vector integer operations.
Quentin Colombet9e16c8a2014-01-29 18:26:59 +0000147defm : HWWriteResPair<WriteVecShift, HWPort0, 1>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000148defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
149defm : HWWriteResPair<WriteVecALU, HWPort15, 1>;
150defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>;
Quentin Colombet9e16c8a2014-01-29 18:26:59 +0000151defm : HWWriteResPair<WriteShuffle, HWPort5, 1>;
Quentin Colombetca498512014-02-24 19:33:51 +0000152defm : HWWriteResPair<WriteBlend, HWPort15, 1>;
153defm : HWWriteResPair<WriteShuffle256, HWPort5, 3>;
154
155def : WriteRes<WriteVarBlend, [HWPort5]> {
156 let Latency = 2;
157 let ResourceCycles = [2];
158}
159def : WriteRes<WriteVarBlendLd, [HWPort5, HWPort23]> {
160 let Latency = 6;
161 let ResourceCycles = [2, 1];
162}
163
164def : WriteRes<WriteVarVecShift, [HWPort0, HWPort5]> {
165 let Latency = 2;
166 let ResourceCycles = [2, 1];
167}
168def : WriteRes<WriteVarVecShiftLd, [HWPort0, HWPort5, HWPort23]> {
169 let Latency = 6;
170 let ResourceCycles = [2, 1, 1];
171}
172
173def : WriteRes<WriteMPSAD, [HWPort0, HWPort5]> {
174 let Latency = 6;
175 let ResourceCycles = [1, 2];
176}
177def : WriteRes<WriteMPSADLd, [HWPort23, HWPort0, HWPort5]> {
178 let Latency = 6;
179 let ResourceCycles = [1, 1, 2];
180}
181
182// String instructions.
183// Packed Compare Implicit Length Strings, Return Mask
184def : WriteRes<WritePCmpIStrM, [HWPort0]> {
185 let Latency = 10;
186 let ResourceCycles = [3];
187}
188def : WriteRes<WritePCmpIStrMLd, [HWPort0, HWPort23]> {
189 let Latency = 10;
190 let ResourceCycles = [3, 1];
191}
192
193// Packed Compare Explicit Length Strings, Return Mask
194def : WriteRes<WritePCmpEStrM, [HWPort0, HWPort16, HWPort5]> {
195 let Latency = 10;
196 let ResourceCycles = [3, 2, 4];
197}
198def : WriteRes<WritePCmpEStrMLd, [HWPort05, HWPort16, HWPort23]> {
199 let Latency = 10;
200 let ResourceCycles = [6, 2, 1];
201}
202
203// Packed Compare Implicit Length Strings, Return Index
204def : WriteRes<WritePCmpIStrI, [HWPort0]> {
205 let Latency = 11;
206 let ResourceCycles = [3];
207}
208def : WriteRes<WritePCmpIStrILd, [HWPort0, HWPort23]> {
209 let Latency = 11;
210 let ResourceCycles = [3, 1];
211}
212
213// Packed Compare Explicit Length Strings, Return Index
214def : WriteRes<WritePCmpEStrI, [HWPort05, HWPort16]> {
215 let Latency = 11;
216 let ResourceCycles = [6, 2];
217}
218def : WriteRes<WritePCmpEStrILd, [HWPort0, HWPort16, HWPort5, HWPort23]> {
219 let Latency = 11;
220 let ResourceCycles = [3, 2, 2, 1];
221}
222
223// AES Instructions.
224def : WriteRes<WriteAESDecEnc, [HWPort5]> {
225 let Latency = 7;
226 let ResourceCycles = [1];
227}
228def : WriteRes<WriteAESDecEncLd, [HWPort5, HWPort23]> {
229 let Latency = 7;
230 let ResourceCycles = [1, 1];
231}
232
233def : WriteRes<WriteAESIMC, [HWPort5]> {
234 let Latency = 14;
235 let ResourceCycles = [2];
236}
237def : WriteRes<WriteAESIMCLd, [HWPort5, HWPort23]> {
238 let Latency = 14;
239 let ResourceCycles = [2, 1];
240}
241
242def : WriteRes<WriteAESKeyGen, [HWPort0, HWPort5]> {
243 let Latency = 10;
244 let ResourceCycles = [2, 8];
245}
246def : WriteRes<WriteAESKeyGenLd, [HWPort0, HWPort5, HWPort23]> {
247 let Latency = 10;
248 let ResourceCycles = [2, 7, 1];
249}
250
251// Carry-less multiplication instructions.
252def : WriteRes<WriteCLMul, [HWPort0, HWPort5]> {
253 let Latency = 7;
254 let ResourceCycles = [2, 1];
255}
256def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> {
257 let Latency = 7;
258 let ResourceCycles = [2, 1, 1];
259}
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000260
261def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
262def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
Quentin Colombetca498512014-02-24 19:33:51 +0000263def : WriteRes<WriteFence, [HWPort23, HWPort4]>;
264def : WriteRes<WriteNop, []>;
Quentin Colombet35d37b72014-08-18 17:55:08 +0000265
266//================ Exceptions ================//
267
268//-- Specific Scheduling Models --//
Quentin Colombetfb887b12014-08-18 17:55:13 +0000269def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> {
270 let Latency = 3;
271}
272def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> {
273 let Latency = 7;
274}
275
Quentin Colombet35d37b72014-08-18 17:55:08 +0000276def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> {
277 let Latency = 2;
278 let ResourceCycles = [2];
279}
280def Write2P0156_Lat2Ld : SchedWriteRes<[HWPort0156, HWPort23]> {
281 let Latency = 6;
282 let ResourceCycles = [2, 1];
283}
284
285def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> {
286 let Latency = 1;
287 let ResourceCycles = [2, 1];
288}
289
290def WriteP06 : SchedWriteRes<[HWPort06]>;
291
Quentin Colombetfb887b12014-08-18 17:55:13 +0000292def Write2P06 : SchedWriteRes<[HWPort06]> {
293 let Latency = 1;
294 let NumMicroOps = 2;
295 let ResourceCycles = [2];
296}
297
298def WriteP15 : SchedWriteRes<[HWPort15]>;
299def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> {
300 let Latency = 4;
301}
302
303def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> {
304 let Latency = 2;
305 let NumMicroOps = 3;
306 let ResourceCycles = [3];
307}
308
Quentin Colombetdf260592014-08-18 17:55:11 +0000309def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
310 let Latency = 1;
311 let ResourceCycles = [1, 2, 1];
312}
313
314def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
315 let Latency = 1;
316 let ResourceCycles = [2, 2, 1];
317}
318
319def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
320 let Latency = 1;
321 let ResourceCycles = [3, 2, 1];
322}
323
Quentin Colombet35d37b72014-08-18 17:55:08 +0000324// Notation:
325// - r: register.
326// - mm: 64 bit mmx register.
327// - x = 128 bit xmm register.
328// - (x)mm = mmx or xmm register.
329// - y = 256 bit ymm register.
330// - v = any vector register.
331// - m = memory.
332
333//=== Integer Instructions ===//
334//-- Move instructions --//
335
336// MOV.
337// r16,m.
338def : InstRW<[WriteALULd], (instregex "MOV16rm")>;
339
340// MOVSX, MOVZX.
341// r,m.
342def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
343
344// CMOVcc.
345// r,r.
346def : InstRW<[Write2P0156_Lat2],
347 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>;
348// r,m.
349def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd],
350 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>;
351
352// XCHG.
353// r,r.
354def WriteXCHG : SchedWriteRes<[HWPort0156]> {
355 let Latency = 2;
356 let ResourceCycles = [3];
357}
358
359def : InstRW<[WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
360
361// r,m.
362def WriteXCHGrm : SchedWriteRes<[]> {
363 let Latency = 21;
364 let NumMicroOps = 8;
365}
366def : InstRW<[WriteXCHGrm], (instregex "XCHG(8|16|32|64)rm")>;
367
368// XLAT.
369def WriteXLAT : SchedWriteRes<[]> {
370 let Latency = 7;
371 let NumMicroOps = 3;
372}
373def : InstRW<[WriteXLAT], (instregex "XLAT")>;
374
375// PUSH.
376// m.
377def : InstRW<[Write2P237_P4], (instregex "PUSH(16|32)rmm")>;
378
379// PUSHF.
380def WritePushF : SchedWriteRes<[HWPort1, HWPort4, HWPort237, HWPort06]> {
381 let NumMicroOps = 4;
382}
383def : InstRW<[WritePushF], (instregex "PUSHF(16|32)")>;
384
385// PUSHA.
386def WritePushA : SchedWriteRes<[]> {
387 let NumMicroOps = 19;
388}
389def : InstRW<[WritePushA], (instregex "PUSHA(16|32)")>;
390
391// POP.
392// m.
393def : InstRW<[Write2P237_P4], (instregex "POP(16|32)rmm")>;
394
395// POPF.
396def WritePopF : SchedWriteRes<[]> {
397 let NumMicroOps = 9;
398}
399def : InstRW<[WritePopF], (instregex "POPF(16|32)")>;
400
401// POPA.
402def WritePopA : SchedWriteRes<[]> {
403 let NumMicroOps = 18;
404}
405def : InstRW<[WritePopA], (instregex "POPA(16|32)")>;
406
407// LAHF SAHF.
408def : InstRW<[WriteP06], (instregex "(S|L)AHF")>;
409
410// BSWAP.
411// r32.
412def WriteBSwap32 : SchedWriteRes<[HWPort15]>;
413def : InstRW<[WriteBSwap32], (instregex "BSWAP32r")>;
414
415// r64.
416def WriteBSwap64 : SchedWriteRes<[HWPort06, HWPort15]> {
417 let NumMicroOps = 2;
418}
419def : InstRW<[WriteBSwap64], (instregex "BSWAP64r")>;
420
421// MOVBE.
422// r16,m16 / r64,m64.
423def : InstRW<[Write2P0156_Lat2Ld], (instregex "MOVBE(16|64)rm")>;
424
425// r32, m32.
426def WriteMoveBE32rm : SchedWriteRes<[HWPort15, HWPort23]> {
427 let NumMicroOps = 2;
428}
429def : InstRW<[WriteMoveBE32rm], (instregex "MOVBE32rm")>;
430
431// m16,r16.
432def WriteMoveBE16mr : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
433 let NumMicroOps = 3;
434}
435def : InstRW<[WriteMoveBE16mr], (instregex "MOVBE16mr")>;
436
437// m32,r32.
438def WriteMoveBE32mr : SchedWriteRes<[HWPort15, HWPort237, HWPort4]> {
439 let NumMicroOps = 3;
440}
441def : InstRW<[WriteMoveBE32mr], (instregex "MOVBE32mr")>;
442
443// m64,r64.
444def WriteMoveBE64mr : SchedWriteRes<[HWPort06, HWPort15, HWPort237, HWPort4]> {
445 let NumMicroOps = 4;
446}
447def : InstRW<[WriteMoveBE64mr], (instregex "MOVBE64mr")>;
448
Quentin Colombetdf260592014-08-18 17:55:11 +0000449//-- Arithmetic instructions --//
450
451// ADD SUB.
452// m,r/i.
453def : InstRW<[Write2P0156_2P237_P4],
454 (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
455 "(ADD|SUB)(8|16|32|64)mi8", "(ADD|SUB)64mi32")>;
456
457// ADC SBB.
458// r,r/i.
459def : InstRW<[Write2P0156_Lat2], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)",
460 "(ADC|SBB)(16|32|64)ri8",
461 "(ADC|SBB)64ri32",
462 "(ADC|SBB)(8|16|32|64)rr_REV")>;
463
464// r,m.
465def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], (instregex "(ADC|SBB)(8|16|32|64)rm")>;
466
467// m,r/i.
468def : InstRW<[Write3P0156_2P237_P4],
469 (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
470 "(ADC|SBB)(16|32|64)mi8",
471 "(ADC|SBB)64mi32")>;
472
473// INC DEC NOT NEG.
474// m.
475def : InstRW<[WriteP0156_2P237_P4],
476 (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m",
477 "(INC|DEC)64(16|32)m")>;
478
479// MUL IMUL.
480// r16.
481def WriteMul16 : SchedWriteRes<[HWPort1, HWPort0156]> {
482 let Latency = 4;
483 let NumMicroOps = 4;
484}
485def : InstRW<[WriteMul16], (instregex "IMUL16r", "MUL16r")>;
486
487// m16.
488def WriteMul16Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
489 let Latency = 8;
490 let NumMicroOps = 5;
491}
492def : InstRW<[WriteMul16Ld], (instregex "IMUL16m", "MUL16m")>;
493
494// r32.
495def WriteMul32 : SchedWriteRes<[HWPort1, HWPort0156]> {
496 let Latency = 4;
497 let NumMicroOps = 3;
498}
499def : InstRW<[WriteMul32], (instregex "IMUL32r", "MUL32r")>;
500
501// m32.
502def WriteMul32Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
503 let Latency = 8;
504 let NumMicroOps = 4;
505}
506def : InstRW<[WriteMul32Ld], (instregex "IMUL32m", "MUL32m")>;
507
508// r64.
509def WriteMul64 : SchedWriteRes<[HWPort1, HWPort6]> {
510 let Latency = 3;
511 let NumMicroOps = 2;
512}
513def : InstRW<[WriteMul64], (instregex "IMUL64r", "MUL64r")>;
514
515// m64.
516def WriteMul64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
517 let Latency = 7;
518 let NumMicroOps = 3;
519}
520def : InstRW<[WriteMul64Ld], (instregex "IMUL64m", "MUL64m")>;
521
522// r16,r16.
523def WriteMul16rri : SchedWriteRes<[HWPort1, HWPort0156]> {
524 let Latency = 4;
525 let NumMicroOps = 2;
526}
527def : InstRW<[WriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>;
528
529// r16,m16.
530def WriteMul16rmi : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
531 let Latency = 8;
532 let NumMicroOps = 3;
533}
534def : InstRW<[WriteMul16rmi], (instregex "IMUL16rmi", "IMUL16rmi8")>;
535
536// MULX.
537// r32,r32,r32.
538def WriteMulX32 : SchedWriteRes<[HWPort1, HWPort056]> {
539 let Latency = 4;
540 let NumMicroOps = 3;
541 let ResourceCycles = [1, 2];
542}
543def : InstRW<[WriteMulX32], (instregex "MULX32rr")>;
544
545// r32,r32,m32.
546def WriteMulX32Ld : SchedWriteRes<[HWPort1, HWPort056, HWPort23]> {
547 let Latency = 8;
548 let NumMicroOps = 4;
549 let ResourceCycles = [1, 2, 1];
550}
551def : InstRW<[WriteMulX32Ld], (instregex "MULX32rm")>;
552
553// r64,r64,r64.
554def WriteMulX64 : SchedWriteRes<[HWPort1, HWPort6]> {
555 let Latency = 4;
556 let NumMicroOps = 2;
557}
558def : InstRW<[WriteMulX64], (instregex "MULX64rr")>;
559
560// r64,r64,m64.
561def WriteMulX64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
562 let Latency = 8;
563 let NumMicroOps = 3;
564}
565def : InstRW<[WriteMulX64Ld], (instregex "MULX64rm")>;
566
567// DIV.
568// r8.
569def WriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
570 let Latency = 22;
571 let NumMicroOps = 9;
572}
573def : InstRW<[WriteDiv8], (instregex "DIV8r")>;
574
575// r16.
576def WriteDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
577 let Latency = 23;
578 let NumMicroOps = 10;
579}
580def : InstRW<[WriteDiv16], (instregex "DIV16r")>;
581
582// r32.
583def WriteDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
584 let Latency = 22;
585 let NumMicroOps = 10;
586}
587def : InstRW<[WriteDiv32], (instregex "DIV32r")>;
588
589// r64.
590def WriteDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
591 let Latency = 32;
592 let NumMicroOps = 36;
593}
594def : InstRW<[WriteDiv64], (instregex "DIV64r")>;
595
596// IDIV.
597// r8.
598def WriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
599 let Latency = 23;
600 let NumMicroOps = 9;
601}
602def : InstRW<[WriteIDiv8], (instregex "IDIV8r")>;
603
604// r16.
605def WriteIDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
606 let Latency = 23;
607 let NumMicroOps = 10;
608}
609def : InstRW<[WriteIDiv16], (instregex "IDIV16r")>;
610
611// r32.
612def WriteIDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
613 let Latency = 22;
614 let NumMicroOps = 9;
615}
616def : InstRW<[WriteIDiv32], (instregex "IDIV32r")>;
617
618// r64.
619def WriteIDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
620 let Latency = 39;
621 let NumMicroOps = 59;
622}
623def : InstRW<[WriteIDiv64], (instregex "IDIV64r")>;
624
Quentin Colombetfb887b12014-08-18 17:55:13 +0000625//-- Logic instructions --//
626
627// AND OR XOR.
628// m,r/i.
629def : InstRW<[Write2P0156_2P237_P4],
630 (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
631 "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
632
633// SHR SHL SAR.
634// m,i.
635def WriteShiftRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
636 let NumMicroOps = 4;
637 let ResourceCycles = [2, 1, 1];
638}
639def : InstRW<[WriteShiftRMW], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
640
641// r,cl.
642def : InstRW<[Write3P06_Lat2], (instregex "S(A|H)(R|L)(8|16|32|64)rCL")>;
643
644// m,cl.
645def WriteShiftClLdRMW : SchedWriteRes<[HWPort06, HWPort23, HWPort4]> {
646 let NumMicroOps = 6;
647 let ResourceCycles = [3, 2, 1];
648}
649def : InstRW<[WriteShiftClLdRMW], (instregex "S(A|H)(R|L)(8|16|32|64)mCL")>;
650
651// ROR ROL.
652// r,1.
653def : InstRW<[Write2P06], (instregex "RO(R|L)(8|16|32|64)r1")>;
654
655// m,i.
656def WriteRotateRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
657 let NumMicroOps = 5;
658 let ResourceCycles = [2, 2, 1];
659}
660def : InstRW<[WriteRotateRMW], (instregex "RO(R|L)(8|16|32|64)mi")>;
661
662// r,cl.
663def : InstRW<[Write3P06_Lat2], (instregex "RO(R|L)(8|16|32|64)rCL")>;
664
665// m,cl.
666def WriteRotateRMWCL : SchedWriteRes<[]> {
667 let NumMicroOps = 6;
668}
669def : InstRW<[WriteRotateRMWCL], (instregex "RO(R|L)(8|16|32|64)mCL")>;
670
671// RCR RCL.
672// r,1.
673def WriteRCr1 : SchedWriteRes<[HWPort06, HWPort0156]> {
674 let Latency = 2;
675 let NumMicroOps = 3;
676 let ResourceCycles = [2, 1];
677}
678def : InstRW<[WriteRCr1], (instregex "RC(R|L)(8|16|32|64)r1")>;
679
680// m,1.
681def WriteRCm1 : SchedWriteRes<[]> {
682 let NumMicroOps = 6;
683}
684def : InstRW<[WriteRCm1], (instregex "RC(R|L)(8|16|32|64)m1")>;
685
686// r,i.
687def WriteRCri : SchedWriteRes<[HWPort0156]> {
688 let Latency = 6;
689 let NumMicroOps = 8;
690}
691def : InstRW<[WriteRCri], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>;
692
693// m,i.
694def WriteRCmi : SchedWriteRes<[]> {
695 let NumMicroOps = 11;
696}
697def : InstRW<[WriteRCmi], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>;
698
699// SHRD SHLD.
700// r,r,i.
701def WriteShDrr : SchedWriteRes<[HWPort1]> {
702 let Latency = 3;
703}
704def : InstRW<[WriteShDrr], (instregex "SH(R|L)D(16|32|64)rri8")>;
705
706// m,r,i.
707def WriteShDmr : SchedWriteRes<[]> {
708 let NumMicroOps = 5;
709}
710def : InstRW<[WriteShDmr], (instregex "SH(R|L)D(16|32|64)mri8")>;
711
712// r,r,cl.
713def WriteShlDCL : SchedWriteRes<[HWPort0156]> {
714 let Latency = 3;
715 let NumMicroOps = 4;
716}
717def : InstRW<[WriteShlDCL], (instregex "SHLD(16|32|64)rrCL")>;
718
719// r,r,cl.
720def WriteShrDCL : SchedWriteRes<[HWPort0156]> {
721 let Latency = 4;
722 let NumMicroOps = 4;
723}
724def : InstRW<[WriteShrDCL], (instregex "SHRD(16|32|64)rrCL")>;
725
726// m,r,cl.
727def WriteShDmrCL : SchedWriteRes<[]> {
728 let NumMicroOps = 7;
729}
730def : InstRW<[WriteShDmrCL], (instregex "SH(R|L)D(16|32|64)mrCL")>;
731
732// BT.
733// r,r/i.
734def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>;
735
736// m,r.
737def WriteBTmr : SchedWriteRes<[]> {
738 let NumMicroOps = 10;
739}
740def : InstRW<[WriteBTmr], (instregex "BT(16|32|64)mr")>;
741
742// m,i.
743def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
744
745// BTR BTS BTC.
746// r,r,i.
747def : InstRW<[WriteShift], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
748
749// m,r.
750def WriteBTRSCmr : SchedWriteRes<[]> {
751 let NumMicroOps = 11;
752}
753def : InstRW<[WriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>;
754
755// m,i.
756def : InstRW<[WriteShiftLd], (instregex "BT(R|S|C)(16|32|64)mi8")>;
757
758// BSF BSR.
759// r,r.
760def : InstRW<[WriteP1_Lat3], (instregex "BS(R|F)(16|32|64)rr")>;
761// r,m.
762def : InstRW<[WriteP1_Lat3Ld], (instregex "BS(R|F)(16|32|64)rm")>;
763
764// SETcc.
765// r.
766def : InstRW<[WriteShift],
767 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>;
768// m.
769def WriteSetCCm : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
770 let NumMicroOps = 3;
771}
772def : InstRW<[WriteSetCCm],
773 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>;
774
775// CLD STD.
776def WriteCldStd : SchedWriteRes<[HWPort15, HWPort6]> {
777 let NumMicroOps = 3;
778}
779def : InstRW<[WriteCldStd], (instregex "STD", "CLD")>;
780
781// LZCNT TZCNT.
782// r,r.
783def : InstRW<[WriteP1_Lat3], (instregex "(L|TZCNT)(16|32|64)rr")>;
784// r,m.
785def : InstRW<[WriteP1_Lat3Ld], (instregex "(L|TZCNT)(16|32|64)rm")>;
786
787// ANDN.
788// r,r.
789def : InstRW<[WriteP15], (instregex "ANDN(32|64)rr")>;
790// r,m.
791def : InstRW<[WriteP15Ld], (instregex "ANDN(32|64)rm")>;
792
793// BLSI BLSMSK BLSR.
794// r,r.
795def : InstRW<[WriteP15], (instregex "BLS(I|MSK|R)(32|64)rr")>;
796// r,m.
797def : InstRW<[WriteP15Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>;
798
799// BEXTR.
800// r,r,r.
801def : InstRW<[Write2P0156_Lat2], (instregex "BEXTR(32|64)rr")>;
802// r,m,r.
803def : InstRW<[Write2P0156_Lat2Ld], (instregex "BEXTR(32|64)rm")>;
804
805// BZHI.
806// r,r,r.
807def : InstRW<[WriteP15], (instregex "BZHI(32|64)rr")>;
808// r,m,r.
809def : InstRW<[WriteP15Ld], (instregex "BZHI(32|64)rm")>;
810
811// PDEP PEXT.
812// r,r,r.
813def : InstRW<[WriteP1_Lat3], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
814// r,m,r.
815def : InstRW<[WriteP1_Lat3Ld], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
816
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000817} // SchedModel