blob: 99903006f20902beb44d58f7d7cf47c31918b64d [file] [log] [blame]
Nadav Roteme7b6a8a2013-03-28 22:34:46 +00001//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the machine model for Haswell to support instruction
11// scheduling and other instruction cost heuristics.
12//
13//===----------------------------------------------------------------------===//
14
15def HaswellModel : SchedMachineModel {
16 // All x86 instructions are modeled as a single micro-op, and HW can decode 4
17 // instructions per cycle.
18 let IssueWidth = 4;
Andrew Trick18dc3da2013-06-15 04:50:02 +000019 let MicroOpBufferSize = 192; // Based on the reorder buffer.
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000020 let LoadLatency = 4;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000021 let MispredictPenalty = 16;
Andrew Trickb6854d82013-09-25 18:14:12 +000022
Hal Finkel6532c202014-05-08 09:14:44 +000023 // Based on the LSD (loop-stream detector) queue size and benchmarking data.
24 let LoopMicroOpBufferSize = 50;
25
Andrew Trickb6854d82013-09-25 18:14:12 +000026 // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
27 // the scheduler to assign a default model to unrecognized opcodes.
28 let CompleteModel = 0;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000029}
30
31let SchedModel = HaswellModel in {
32
33// Haswell can issue micro-ops to 8 different ports in one cycle.
34
Quentin Colombet9e16c8a2014-01-29 18:26:59 +000035// Ports 0, 1, 5, and 6 handle all computation.
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000036// Port 4 gets the data half of stores. Store data can be available later than
37// the store address, but since we don't model the latency of stores, we can
38// ignore that.
39// Ports 2 and 3 are identical. They handle loads and the address half of
40// stores. Port 7 can handle address calculations.
41def HWPort0 : ProcResource<1>;
42def HWPort1 : ProcResource<1>;
43def HWPort2 : ProcResource<1>;
44def HWPort3 : ProcResource<1>;
45def HWPort4 : ProcResource<1>;
46def HWPort5 : ProcResource<1>;
47def HWPort6 : ProcResource<1>;
48def HWPort7 : ProcResource<1>;
49
50// Many micro-ops are capable of issuing on multiple ports.
Quentin Colombet0bc907e2014-08-18 17:55:26 +000051def HWPort01 : ProcResGroup<[HWPort0, HWPort1]>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000052def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>;
53def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
Quentin Colombetf68e0942014-08-18 17:55:36 +000054def HWPort04 : ProcResGroup<[HWPort0, HWPort4]>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000055def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
Quentin Colombet9e16c8a2014-01-29 18:26:59 +000056def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000057def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
Quentin Colombetca498512014-02-24 19:33:51 +000058def HWPort16 : ProcResGroup<[HWPort1, HWPort6]>;
Quentin Colombetf68e0942014-08-18 17:55:36 +000059def HWPort56: ProcResGroup<[HWPort5, HWPort6]>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000060def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
Quentin Colombetdf260592014-08-18 17:55:11 +000061def HWPort056: ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000062def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
63
Andrew Trick40c4f382013-06-15 04:50:06 +000064// 60 Entry Unified Scheduler
65def HWPortAny : ProcResGroup<[HWPort0, HWPort1, HWPort2, HWPort3, HWPort4,
66 HWPort5, HWPort6, HWPort7]> {
67 let BufferSize=60;
68}
69
Andrew Tricke1d88cf2013-04-02 01:58:47 +000070// Integer division issued on port 0.
71def HWDivider : ProcResource<1>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +000072
73// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
74// cycles after the memory operand.
75def : ReadAdvance<ReadAfterLd, 4>;
76
77// Many SchedWrites are defined in pairs with and without a folded load.
78// Instructions with folded loads are usually micro-fused, so they only appear
79// as two micro-ops when queued in the reservation station.
80// This multiclass defines the resource usage for variants with and without
81// folded loads.
82multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
83 ProcResourceKind ExePort,
84 int Lat> {
85 // Register variant is using a single cycle on ExePort.
86 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
87
88 // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
89 // latency.
90 def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> {
91 let Latency = !add(Lat, 4);
92 }
93}
94
95// A folded store needs a cycle on port 4 for the store data, but it does not
96// need an extra port 2/3 cycle to recompute the address.
97def : WriteRes<WriteRMW, [HWPort4]>;
98
Quentin Colombet9e16c8a2014-01-29 18:26:59 +000099// Store_addr on 237.
100// Store_data on 4.
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000101def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
102def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; }
103def : WriteRes<WriteMove, [HWPort0156]>;
104def : WriteRes<WriteZero, []>;
105
106defm : HWWriteResPair<WriteALU, HWPort0156, 1>;
107defm : HWWriteResPair<WriteIMul, HWPort1, 3>;
Andrew Trick7201f4f2013-06-21 18:33:04 +0000108def : WriteRes<WriteIMulH, []> { let Latency = 3; }
Quentin Colombet9e16c8a2014-01-29 18:26:59 +0000109defm : HWWriteResPair<WriteShift, HWPort06, 1>;
110defm : HWWriteResPair<WriteJump, HWPort06, 1>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000111
112// This is for simple LEAs with one or two input operands.
113// The complex ones can only execute on port 1, and they require two cycles on
114// the port to read all inputs. We don't model that.
115def : WriteRes<WriteLEA, [HWPort15]>;
116
117// This is quite rough, latency depends on the dividend.
118def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> {
119 let Latency = 25;
120 let ResourceCycles = [1, 10];
121}
122def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> {
123 let Latency = 29;
124 let ResourceCycles = [1, 1, 10];
125}
126
127// Scalar and vector floating point.
128defm : HWWriteResPair<WriteFAdd, HWPort1, 3>;
129defm : HWWriteResPair<WriteFMul, HWPort0, 5>;
130defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles.
131defm : HWWriteResPair<WriteFRcp, HWPort0, 5>;
132defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
133defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
134defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
135defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
Quentin Colombetca498512014-02-24 19:33:51 +0000136defm : HWWriteResPair<WriteFShuffle, HWPort5, 1>;
137defm : HWWriteResPair<WriteFBlend, HWPort015, 1>;
138defm : HWWriteResPair<WriteFShuffle256, HWPort5, 3>;
139
140def : WriteRes<WriteFVarBlend, [HWPort5]> {
141 let Latency = 2;
142 let ResourceCycles = [2];
143}
144def : WriteRes<WriteFVarBlendLd, [HWPort5, HWPort23]> {
145 let Latency = 6;
146 let ResourceCycles = [2, 1];
147}
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000148
149// Vector integer operations.
Quentin Colombet9e16c8a2014-01-29 18:26:59 +0000150defm : HWWriteResPair<WriteVecShift, HWPort0, 1>;
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000151defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
152defm : HWWriteResPair<WriteVecALU, HWPort15, 1>;
153defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>;
Quentin Colombet9e16c8a2014-01-29 18:26:59 +0000154defm : HWWriteResPair<WriteShuffle, HWPort5, 1>;
Quentin Colombetca498512014-02-24 19:33:51 +0000155defm : HWWriteResPair<WriteBlend, HWPort15, 1>;
156defm : HWWriteResPair<WriteShuffle256, HWPort5, 3>;
157
158def : WriteRes<WriteVarBlend, [HWPort5]> {
159 let Latency = 2;
160 let ResourceCycles = [2];
161}
162def : WriteRes<WriteVarBlendLd, [HWPort5, HWPort23]> {
163 let Latency = 6;
164 let ResourceCycles = [2, 1];
165}
166
167def : WriteRes<WriteVarVecShift, [HWPort0, HWPort5]> {
168 let Latency = 2;
169 let ResourceCycles = [2, 1];
170}
171def : WriteRes<WriteVarVecShiftLd, [HWPort0, HWPort5, HWPort23]> {
172 let Latency = 6;
173 let ResourceCycles = [2, 1, 1];
174}
175
176def : WriteRes<WriteMPSAD, [HWPort0, HWPort5]> {
177 let Latency = 6;
178 let ResourceCycles = [1, 2];
179}
180def : WriteRes<WriteMPSADLd, [HWPort23, HWPort0, HWPort5]> {
181 let Latency = 6;
182 let ResourceCycles = [1, 1, 2];
183}
184
185// String instructions.
186// Packed Compare Implicit Length Strings, Return Mask
187def : WriteRes<WritePCmpIStrM, [HWPort0]> {
188 let Latency = 10;
189 let ResourceCycles = [3];
190}
191def : WriteRes<WritePCmpIStrMLd, [HWPort0, HWPort23]> {
192 let Latency = 10;
193 let ResourceCycles = [3, 1];
194}
195
196// Packed Compare Explicit Length Strings, Return Mask
197def : WriteRes<WritePCmpEStrM, [HWPort0, HWPort16, HWPort5]> {
198 let Latency = 10;
199 let ResourceCycles = [3, 2, 4];
200}
201def : WriteRes<WritePCmpEStrMLd, [HWPort05, HWPort16, HWPort23]> {
202 let Latency = 10;
203 let ResourceCycles = [6, 2, 1];
204}
205
206// Packed Compare Implicit Length Strings, Return Index
207def : WriteRes<WritePCmpIStrI, [HWPort0]> {
208 let Latency = 11;
209 let ResourceCycles = [3];
210}
211def : WriteRes<WritePCmpIStrILd, [HWPort0, HWPort23]> {
212 let Latency = 11;
213 let ResourceCycles = [3, 1];
214}
215
216// Packed Compare Explicit Length Strings, Return Index
217def : WriteRes<WritePCmpEStrI, [HWPort05, HWPort16]> {
218 let Latency = 11;
219 let ResourceCycles = [6, 2];
220}
221def : WriteRes<WritePCmpEStrILd, [HWPort0, HWPort16, HWPort5, HWPort23]> {
222 let Latency = 11;
223 let ResourceCycles = [3, 2, 2, 1];
224}
225
226// AES Instructions.
227def : WriteRes<WriteAESDecEnc, [HWPort5]> {
228 let Latency = 7;
229 let ResourceCycles = [1];
230}
231def : WriteRes<WriteAESDecEncLd, [HWPort5, HWPort23]> {
232 let Latency = 7;
233 let ResourceCycles = [1, 1];
234}
235
236def : WriteRes<WriteAESIMC, [HWPort5]> {
237 let Latency = 14;
238 let ResourceCycles = [2];
239}
240def : WriteRes<WriteAESIMCLd, [HWPort5, HWPort23]> {
241 let Latency = 14;
242 let ResourceCycles = [2, 1];
243}
244
245def : WriteRes<WriteAESKeyGen, [HWPort0, HWPort5]> {
246 let Latency = 10;
247 let ResourceCycles = [2, 8];
248}
249def : WriteRes<WriteAESKeyGenLd, [HWPort0, HWPort5, HWPort23]> {
250 let Latency = 10;
251 let ResourceCycles = [2, 7, 1];
252}
253
254// Carry-less multiplication instructions.
255def : WriteRes<WriteCLMul, [HWPort0, HWPort5]> {
256 let Latency = 7;
257 let ResourceCycles = [2, 1];
258}
259def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> {
260 let Latency = 7;
261 let ResourceCycles = [2, 1, 1];
262}
Nadav Roteme7b6a8a2013-03-28 22:34:46 +0000263
264def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
265def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
Quentin Colombetca498512014-02-24 19:33:51 +0000266def : WriteRes<WriteFence, [HWPort23, HWPort4]>;
267def : WriteRes<WriteNop, []>;
Quentin Colombet35d37b72014-08-18 17:55:08 +0000268
269//================ Exceptions ================//
270
271//-- Specific Scheduling Models --//
Quentin Colombet456c9912014-08-18 17:55:29 +0000272def WriteP0 : SchedWriteRes<[HWPort0]>;
273def WriteP1 : SchedWriteRes<[HWPort1]>;
274def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> {
275 let NumMicroOps = 2;
276}
Quentin Colombetfb887b12014-08-18 17:55:13 +0000277def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> {
278 let Latency = 3;
279}
280def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> {
281 let Latency = 7;
282}
283
Quentin Colombet35d37b72014-08-18 17:55:08 +0000284def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> {
285 let Latency = 2;
286 let ResourceCycles = [2];
287}
288def Write2P0156_Lat2Ld : SchedWriteRes<[HWPort0156, HWPort23]> {
289 let Latency = 6;
290 let ResourceCycles = [2, 1];
291}
292
Quentin Colombetf68e0942014-08-18 17:55:36 +0000293def Write5P0156 : SchedWriteRes<[HWPort0156]> {
294 let NumMicroOps = 5;
295 let ResourceCycles = [5];
296}
297
Quentin Colombet35d37b72014-08-18 17:55:08 +0000298def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> {
299 let Latency = 1;
300 let ResourceCycles = [2, 1];
301}
302
Quentin Colombet0bc907e2014-08-18 17:55:26 +0000303def WriteP01 : SchedWriteRes<[HWPort01]>;
304
305def Write2P01 : SchedWriteRes<[HWPort01]> {
306 let NumMicroOps = 2;
307}
Quentin Colombet456c9912014-08-18 17:55:29 +0000308def Write3P01 : SchedWriteRes<[HWPort01]> {
309 let NumMicroOps = 3;
310}
Quentin Colombet0bc907e2014-08-18 17:55:26 +0000311
Quentin Colombetf68e0942014-08-18 17:55:36 +0000312def WriteP015 : SchedWriteRes<[HWPort015]>;
313
314def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> {
315 let NumMicroOps = 2;
316}
Quentin Colombet35d37b72014-08-18 17:55:08 +0000317def WriteP06 : SchedWriteRes<[HWPort06]>;
318
Quentin Colombetfb887b12014-08-18 17:55:13 +0000319def Write2P06 : SchedWriteRes<[HWPort06]> {
320 let Latency = 1;
321 let NumMicroOps = 2;
322 let ResourceCycles = [2];
323}
324
Quentin Colombet456c9912014-08-18 17:55:29 +0000325def Write2P1 : SchedWriteRes<[HWPort1]> {
326 let NumMicroOps = 2;
327 let ResourceCycles = [2];
328}
329def Write2P1_P23 : SchedWriteRes<[HWPort1, HWPort23]> {
330 let NumMicroOps = 3;
331 let ResourceCycles = [2, 1];
332}
Quentin Colombetfb887b12014-08-18 17:55:13 +0000333def WriteP15 : SchedWriteRes<[HWPort15]>;
334def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> {
335 let Latency = 4;
336}
337
338def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> {
339 let Latency = 2;
340 let NumMicroOps = 3;
341 let ResourceCycles = [3];
342}
343
Quentin Colombetc58fc442014-08-18 17:55:19 +0000344def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
345 let NumMicroOps = 2;
346}
347
Quentin Colombetdf260592014-08-18 17:55:11 +0000348def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
349 let Latency = 1;
350 let ResourceCycles = [1, 2, 1];
351}
352
353def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
354 let Latency = 1;
355 let ResourceCycles = [2, 2, 1];
356}
357
Quentin Colombetc58fc442014-08-18 17:55:19 +0000358def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
359 let NumMicroOps = 3;
360 let ResourceCycles = [2, 1];
361}
362
Quentin Colombetdf260592014-08-18 17:55:11 +0000363def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
364 let Latency = 1;
365 let ResourceCycles = [3, 2, 1];
366}
367
Quentin Colombetf68e0942014-08-18 17:55:36 +0000368def WriteP5 : SchedWriteRes<[HWPort5]>;
369def WriteP5Ld : SchedWriteRes<[HWPort5, HWPort23]> {
370 let Latency = 5;
371 let NumMicroOps = 2;
372 let ResourceCycles = [1, 1];
373}
374
Quentin Colombetca74f232014-08-18 17:55:49 +0000375def WriteP0_P1_Lat4 : SchedWriteRes<[HWPort0, HWPort1]> {
376 let Latency = 4;
377 let NumMicroOps = 2;
378 let ResourceCycles = [1, 1];
379}
380
381def WriteP0_P1_Lat4Ld : SchedWriteRes<[HWPort0, HWPort1, HWPort23]> {
382 let Latency = 8;
383 let NumMicroOps = 3;
384 let ResourceCycles = [1, 1, 1];
385}
386
387def WriteP1_P5_Lat4 : SchedWriteRes<[HWPort1, HWPort5]> {
388 let Latency = 4;
389 let NumMicroOps = 2;
390 let ResourceCycles = [1, 1];
391}
392
393def WriteP1_P5_Lat4Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
394 let Latency = 8;
395 let NumMicroOps = 3;
396 let ResourceCycles = [1, 1, 1];
397}
398
399def WriteP1_P5_Lat6 : SchedWriteRes<[HWPort1, HWPort5]> {
400 let Latency = 6;
401 let NumMicroOps = 2;
402 let ResourceCycles = [1, 1];
403}
404
405def WriteP1_P5_Lat6Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
406 let Latency = 10;
407 let NumMicroOps = 3;
408 let ResourceCycles = [1, 1, 1];
409}
410
Quentin Colombet35d37b72014-08-18 17:55:08 +0000411// Notation:
412// - r: register.
413// - mm: 64 bit mmx register.
414// - x = 128 bit xmm register.
415// - (x)mm = mmx or xmm register.
416// - y = 256 bit ymm register.
417// - v = any vector register.
418// - m = memory.
419
420//=== Integer Instructions ===//
421//-- Move instructions --//
422
423// MOV.
424// r16,m.
425def : InstRW<[WriteALULd], (instregex "MOV16rm")>;
426
427// MOVSX, MOVZX.
428// r,m.
429def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
430
431// CMOVcc.
432// r,r.
433def : InstRW<[Write2P0156_Lat2],
434 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>;
435// r,m.
436def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd],
437 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>;
438
439// XCHG.
440// r,r.
441def WriteXCHG : SchedWriteRes<[HWPort0156]> {
442 let Latency = 2;
443 let ResourceCycles = [3];
444}
445
446def : InstRW<[WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
447
448// r,m.
449def WriteXCHGrm : SchedWriteRes<[]> {
450 let Latency = 21;
451 let NumMicroOps = 8;
452}
453def : InstRW<[WriteXCHGrm], (instregex "XCHG(8|16|32|64)rm")>;
454
455// XLAT.
456def WriteXLAT : SchedWriteRes<[]> {
457 let Latency = 7;
458 let NumMicroOps = 3;
459}
460def : InstRW<[WriteXLAT], (instregex "XLAT")>;
461
462// PUSH.
463// m.
464def : InstRW<[Write2P237_P4], (instregex "PUSH(16|32)rmm")>;
465
466// PUSHF.
467def WritePushF : SchedWriteRes<[HWPort1, HWPort4, HWPort237, HWPort06]> {
468 let NumMicroOps = 4;
469}
470def : InstRW<[WritePushF], (instregex "PUSHF(16|32)")>;
471
472// PUSHA.
473def WritePushA : SchedWriteRes<[]> {
474 let NumMicroOps = 19;
475}
476def : InstRW<[WritePushA], (instregex "PUSHA(16|32)")>;
477
478// POP.
479// m.
480def : InstRW<[Write2P237_P4], (instregex "POP(16|32)rmm")>;
481
482// POPF.
483def WritePopF : SchedWriteRes<[]> {
484 let NumMicroOps = 9;
485}
486def : InstRW<[WritePopF], (instregex "POPF(16|32)")>;
487
488// POPA.
489def WritePopA : SchedWriteRes<[]> {
490 let NumMicroOps = 18;
491}
492def : InstRW<[WritePopA], (instregex "POPA(16|32)")>;
493
494// LAHF SAHF.
495def : InstRW<[WriteP06], (instregex "(S|L)AHF")>;
496
497// BSWAP.
498// r32.
499def WriteBSwap32 : SchedWriteRes<[HWPort15]>;
500def : InstRW<[WriteBSwap32], (instregex "BSWAP32r")>;
501
502// r64.
503def WriteBSwap64 : SchedWriteRes<[HWPort06, HWPort15]> {
504 let NumMicroOps = 2;
505}
506def : InstRW<[WriteBSwap64], (instregex "BSWAP64r")>;
507
508// MOVBE.
509// r16,m16 / r64,m64.
510def : InstRW<[Write2P0156_Lat2Ld], (instregex "MOVBE(16|64)rm")>;
511
512// r32, m32.
513def WriteMoveBE32rm : SchedWriteRes<[HWPort15, HWPort23]> {
514 let NumMicroOps = 2;
515}
516def : InstRW<[WriteMoveBE32rm], (instregex "MOVBE32rm")>;
517
518// m16,r16.
519def WriteMoveBE16mr : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
520 let NumMicroOps = 3;
521}
522def : InstRW<[WriteMoveBE16mr], (instregex "MOVBE16mr")>;
523
524// m32,r32.
525def WriteMoveBE32mr : SchedWriteRes<[HWPort15, HWPort237, HWPort4]> {
526 let NumMicroOps = 3;
527}
528def : InstRW<[WriteMoveBE32mr], (instregex "MOVBE32mr")>;
529
530// m64,r64.
531def WriteMoveBE64mr : SchedWriteRes<[HWPort06, HWPort15, HWPort237, HWPort4]> {
532 let NumMicroOps = 4;
533}
534def : InstRW<[WriteMoveBE64mr], (instregex "MOVBE64mr")>;
535
Quentin Colombetdf260592014-08-18 17:55:11 +0000536//-- Arithmetic instructions --//
537
538// ADD SUB.
539// m,r/i.
540def : InstRW<[Write2P0156_2P237_P4],
541 (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
542 "(ADD|SUB)(8|16|32|64)mi8", "(ADD|SUB)64mi32")>;
543
544// ADC SBB.
545// r,r/i.
546def : InstRW<[Write2P0156_Lat2], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)",
547 "(ADC|SBB)(16|32|64)ri8",
548 "(ADC|SBB)64ri32",
549 "(ADC|SBB)(8|16|32|64)rr_REV")>;
550
551// r,m.
552def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], (instregex "(ADC|SBB)(8|16|32|64)rm")>;
553
554// m,r/i.
555def : InstRW<[Write3P0156_2P237_P4],
556 (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
557 "(ADC|SBB)(16|32|64)mi8",
558 "(ADC|SBB)64mi32")>;
559
560// INC DEC NOT NEG.
561// m.
562def : InstRW<[WriteP0156_2P237_P4],
563 (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m",
564 "(INC|DEC)64(16|32)m")>;
565
566// MUL IMUL.
567// r16.
568def WriteMul16 : SchedWriteRes<[HWPort1, HWPort0156]> {
569 let Latency = 4;
570 let NumMicroOps = 4;
571}
572def : InstRW<[WriteMul16], (instregex "IMUL16r", "MUL16r")>;
573
574// m16.
575def WriteMul16Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
576 let Latency = 8;
577 let NumMicroOps = 5;
578}
579def : InstRW<[WriteMul16Ld], (instregex "IMUL16m", "MUL16m")>;
580
581// r32.
582def WriteMul32 : SchedWriteRes<[HWPort1, HWPort0156]> {
583 let Latency = 4;
584 let NumMicroOps = 3;
585}
586def : InstRW<[WriteMul32], (instregex "IMUL32r", "MUL32r")>;
587
588// m32.
589def WriteMul32Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
590 let Latency = 8;
591 let NumMicroOps = 4;
592}
593def : InstRW<[WriteMul32Ld], (instregex "IMUL32m", "MUL32m")>;
594
595// r64.
596def WriteMul64 : SchedWriteRes<[HWPort1, HWPort6]> {
597 let Latency = 3;
598 let NumMicroOps = 2;
599}
600def : InstRW<[WriteMul64], (instregex "IMUL64r", "MUL64r")>;
601
602// m64.
603def WriteMul64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
604 let Latency = 7;
605 let NumMicroOps = 3;
606}
607def : InstRW<[WriteMul64Ld], (instregex "IMUL64m", "MUL64m")>;
608
609// r16,r16.
610def WriteMul16rri : SchedWriteRes<[HWPort1, HWPort0156]> {
611 let Latency = 4;
612 let NumMicroOps = 2;
613}
614def : InstRW<[WriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>;
615
616// r16,m16.
617def WriteMul16rmi : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
618 let Latency = 8;
619 let NumMicroOps = 3;
620}
621def : InstRW<[WriteMul16rmi], (instregex "IMUL16rmi", "IMUL16rmi8")>;
622
623// MULX.
624// r32,r32,r32.
625def WriteMulX32 : SchedWriteRes<[HWPort1, HWPort056]> {
626 let Latency = 4;
627 let NumMicroOps = 3;
628 let ResourceCycles = [1, 2];
629}
630def : InstRW<[WriteMulX32], (instregex "MULX32rr")>;
631
632// r32,r32,m32.
633def WriteMulX32Ld : SchedWriteRes<[HWPort1, HWPort056, HWPort23]> {
634 let Latency = 8;
635 let NumMicroOps = 4;
636 let ResourceCycles = [1, 2, 1];
637}
638def : InstRW<[WriteMulX32Ld], (instregex "MULX32rm")>;
639
640// r64,r64,r64.
641def WriteMulX64 : SchedWriteRes<[HWPort1, HWPort6]> {
642 let Latency = 4;
643 let NumMicroOps = 2;
644}
645def : InstRW<[WriteMulX64], (instregex "MULX64rr")>;
646
647// r64,r64,m64.
648def WriteMulX64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
649 let Latency = 8;
650 let NumMicroOps = 3;
651}
652def : InstRW<[WriteMulX64Ld], (instregex "MULX64rm")>;
653
654// DIV.
655// r8.
656def WriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
657 let Latency = 22;
658 let NumMicroOps = 9;
659}
660def : InstRW<[WriteDiv8], (instregex "DIV8r")>;
661
662// r16.
663def WriteDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
664 let Latency = 23;
665 let NumMicroOps = 10;
666}
667def : InstRW<[WriteDiv16], (instregex "DIV16r")>;
668
669// r32.
670def WriteDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
671 let Latency = 22;
672 let NumMicroOps = 10;
673}
674def : InstRW<[WriteDiv32], (instregex "DIV32r")>;
675
676// r64.
677def WriteDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
678 let Latency = 32;
679 let NumMicroOps = 36;
680}
681def : InstRW<[WriteDiv64], (instregex "DIV64r")>;
682
683// IDIV.
684// r8.
685def WriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
686 let Latency = 23;
687 let NumMicroOps = 9;
688}
689def : InstRW<[WriteIDiv8], (instregex "IDIV8r")>;
690
691// r16.
692def WriteIDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
693 let Latency = 23;
694 let NumMicroOps = 10;
695}
696def : InstRW<[WriteIDiv16], (instregex "IDIV16r")>;
697
698// r32.
699def WriteIDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
700 let Latency = 22;
701 let NumMicroOps = 9;
702}
703def : InstRW<[WriteIDiv32], (instregex "IDIV32r")>;
704
705// r64.
706def WriteIDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
707 let Latency = 39;
708 let NumMicroOps = 59;
709}
710def : InstRW<[WriteIDiv64], (instregex "IDIV64r")>;
711
Quentin Colombetfb887b12014-08-18 17:55:13 +0000712//-- Logic instructions --//
713
714// AND OR XOR.
715// m,r/i.
716def : InstRW<[Write2P0156_2P237_P4],
717 (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
718 "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
719
720// SHR SHL SAR.
721// m,i.
722def WriteShiftRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
723 let NumMicroOps = 4;
724 let ResourceCycles = [2, 1, 1];
725}
726def : InstRW<[WriteShiftRMW], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
727
728// r,cl.
729def : InstRW<[Write3P06_Lat2], (instregex "S(A|H)(R|L)(8|16|32|64)rCL")>;
730
731// m,cl.
732def WriteShiftClLdRMW : SchedWriteRes<[HWPort06, HWPort23, HWPort4]> {
733 let NumMicroOps = 6;
734 let ResourceCycles = [3, 2, 1];
735}
736def : InstRW<[WriteShiftClLdRMW], (instregex "S(A|H)(R|L)(8|16|32|64)mCL")>;
737
738// ROR ROL.
739// r,1.
740def : InstRW<[Write2P06], (instregex "RO(R|L)(8|16|32|64)r1")>;
741
742// m,i.
743def WriteRotateRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
744 let NumMicroOps = 5;
745 let ResourceCycles = [2, 2, 1];
746}
747def : InstRW<[WriteRotateRMW], (instregex "RO(R|L)(8|16|32|64)mi")>;
748
749// r,cl.
750def : InstRW<[Write3P06_Lat2], (instregex "RO(R|L)(8|16|32|64)rCL")>;
751
752// m,cl.
753def WriteRotateRMWCL : SchedWriteRes<[]> {
754 let NumMicroOps = 6;
755}
756def : InstRW<[WriteRotateRMWCL], (instregex "RO(R|L)(8|16|32|64)mCL")>;
757
758// RCR RCL.
759// r,1.
760def WriteRCr1 : SchedWriteRes<[HWPort06, HWPort0156]> {
761 let Latency = 2;
762 let NumMicroOps = 3;
763 let ResourceCycles = [2, 1];
764}
765def : InstRW<[WriteRCr1], (instregex "RC(R|L)(8|16|32|64)r1")>;
766
767// m,1.
768def WriteRCm1 : SchedWriteRes<[]> {
769 let NumMicroOps = 6;
770}
771def : InstRW<[WriteRCm1], (instregex "RC(R|L)(8|16|32|64)m1")>;
772
773// r,i.
774def WriteRCri : SchedWriteRes<[HWPort0156]> {
775 let Latency = 6;
776 let NumMicroOps = 8;
777}
778def : InstRW<[WriteRCri], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>;
779
780// m,i.
781def WriteRCmi : SchedWriteRes<[]> {
782 let NumMicroOps = 11;
783}
784def : InstRW<[WriteRCmi], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>;
785
786// SHRD SHLD.
787// r,r,i.
788def WriteShDrr : SchedWriteRes<[HWPort1]> {
789 let Latency = 3;
790}
791def : InstRW<[WriteShDrr], (instregex "SH(R|L)D(16|32|64)rri8")>;
792
793// m,r,i.
794def WriteShDmr : SchedWriteRes<[]> {
795 let NumMicroOps = 5;
796}
797def : InstRW<[WriteShDmr], (instregex "SH(R|L)D(16|32|64)mri8")>;
798
799// r,r,cl.
800def WriteShlDCL : SchedWriteRes<[HWPort0156]> {
801 let Latency = 3;
802 let NumMicroOps = 4;
803}
804def : InstRW<[WriteShlDCL], (instregex "SHLD(16|32|64)rrCL")>;
805
806// r,r,cl.
807def WriteShrDCL : SchedWriteRes<[HWPort0156]> {
808 let Latency = 4;
809 let NumMicroOps = 4;
810}
811def : InstRW<[WriteShrDCL], (instregex "SHRD(16|32|64)rrCL")>;
812
813// m,r,cl.
814def WriteShDmrCL : SchedWriteRes<[]> {
815 let NumMicroOps = 7;
816}
817def : InstRW<[WriteShDmrCL], (instregex "SH(R|L)D(16|32|64)mrCL")>;
818
819// BT.
820// r,r/i.
821def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>;
822
823// m,r.
824def WriteBTmr : SchedWriteRes<[]> {
825 let NumMicroOps = 10;
826}
827def : InstRW<[WriteBTmr], (instregex "BT(16|32|64)mr")>;
828
829// m,i.
830def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
831
832// BTR BTS BTC.
833// r,r,i.
834def : InstRW<[WriteShift], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
835
836// m,r.
837def WriteBTRSCmr : SchedWriteRes<[]> {
838 let NumMicroOps = 11;
839}
840def : InstRW<[WriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>;
841
842// m,i.
843def : InstRW<[WriteShiftLd], (instregex "BT(R|S|C)(16|32|64)mi8")>;
844
845// BSF BSR.
846// r,r.
847def : InstRW<[WriteP1_Lat3], (instregex "BS(R|F)(16|32|64)rr")>;
848// r,m.
849def : InstRW<[WriteP1_Lat3Ld], (instregex "BS(R|F)(16|32|64)rm")>;
850
851// SETcc.
852// r.
853def : InstRW<[WriteShift],
854 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>;
855// m.
856def WriteSetCCm : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
857 let NumMicroOps = 3;
858}
859def : InstRW<[WriteSetCCm],
860 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>;
861
862// CLD STD.
863def WriteCldStd : SchedWriteRes<[HWPort15, HWPort6]> {
864 let NumMicroOps = 3;
865}
866def : InstRW<[WriteCldStd], (instregex "STD", "CLD")>;
867
868// LZCNT TZCNT.
869// r,r.
870def : InstRW<[WriteP1_Lat3], (instregex "(L|TZCNT)(16|32|64)rr")>;
871// r,m.
872def : InstRW<[WriteP1_Lat3Ld], (instregex "(L|TZCNT)(16|32|64)rm")>;
873
874// ANDN.
875// r,r.
876def : InstRW<[WriteP15], (instregex "ANDN(32|64)rr")>;
877// r,m.
878def : InstRW<[WriteP15Ld], (instregex "ANDN(32|64)rm")>;
879
880// BLSI BLSMSK BLSR.
881// r,r.
882def : InstRW<[WriteP15], (instregex "BLS(I|MSK|R)(32|64)rr")>;
883// r,m.
884def : InstRW<[WriteP15Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>;
885
886// BEXTR.
887// r,r,r.
888def : InstRW<[Write2P0156_Lat2], (instregex "BEXTR(32|64)rr")>;
889// r,m,r.
890def : InstRW<[Write2P0156_Lat2Ld], (instregex "BEXTR(32|64)rm")>;
891
892// BZHI.
893// r,r,r.
894def : InstRW<[WriteP15], (instregex "BZHI(32|64)rr")>;
895// r,m,r.
896def : InstRW<[WriteP15Ld], (instregex "BZHI(32|64)rm")>;
897
898// PDEP PEXT.
899// r,r,r.
900def : InstRW<[WriteP1_Lat3], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
901// r,m,r.
902def : InstRW<[WriteP1_Lat3Ld], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
903
Quentin Colombete1b17762014-08-18 17:55:16 +0000904//-- Control transfer instructions --//
905
906// J(E|R)CXZ.
907def WriteJCXZ : SchedWriteRes<[HWPort0156, HWPort6]> {
908 let NumMicroOps = 2;
909}
910def : InstRW<[WriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>;
911
912// LOOP.
913def WriteLOOP : SchedWriteRes<[]> {
914 let NumMicroOps = 7;
915}
916def : InstRW<[WriteLOOP], (instregex "LOOP")>;
917
918// LOOP(N)E
919def WriteLOOPE : SchedWriteRes<[]> {
920 let NumMicroOps = 11;
921}
922def : InstRW<[WriteLOOPE], (instregex "LOOPE", "LOOPNE")>;
923
924// CALL.
925// r.
926def WriteCALLr : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
927 let NumMicroOps = 3;
928}
929def : InstRW<[WriteCALLr], (instregex "CALL(16|32)r")>;
930
931// m.
932def WriteCALLm : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
933 let NumMicroOps = 4;
934 let ResourceCycles = [2, 1, 1];
935}
936def : InstRW<[WriteCALLm], (instregex "CALL(16|32)m")>;
937
938// RET.
939def WriteRET : SchedWriteRes<[HWPort237, HWPort6]> {
940 let NumMicroOps = 2;
941}
942def : InstRW<[WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)")>;
943
944// i.
945def WriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> {
946 let NumMicroOps = 4;
947 let ResourceCycles = [1, 2, 1];
948}
949def : InstRW<[WriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>;
950
951// BOUND.
952// r,m.
953def WriteBOUND : SchedWriteRes<[]> {
954 let NumMicroOps = 15;
955}
956def : InstRW<[WriteBOUND], (instregex "BOUNDS(16|32)rm")>;
957
958// INTO.
959def WriteINTO : SchedWriteRes<[]> {
960 let NumMicroOps = 4;
961}
962def : InstRW<[WriteINTO], (instregex "INTO")>;
963
Quentin Colombetc58fc442014-08-18 17:55:19 +0000964//-- String instructions --//
965
966// LODSB/W.
967def : InstRW<[Write2P0156_P23], (instregex "LODS(B|W)")>;
968
969// LODSD/Q.
970def : InstRW<[WriteP0156_P23], (instregex "LODS(L|Q)")>;
971
972// STOS.
973def WriteSTOS : SchedWriteRes<[HWPort23, HWPort0156, HWPort4]> {
974 let NumMicroOps = 3;
975}
976def : InstRW<[WriteSTOS], (instregex "STOS(B|L|Q|W)")>;
977
978// MOVS.
979def WriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> {
980 let Latency = 4;
981 let NumMicroOps = 5;
982 let ResourceCycles = [2, 1, 2];
983}
984def : InstRW<[WriteMOVS], (instregex "MOVS(B|L|Q|W)")>;
985
986// SCAS.
987def : InstRW<[Write2P0156_P23], (instregex "SCAS(B|W|L|Q)")>;
988
989// CMPS.
990def WriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> {
991 let Latency = 4;
992 let NumMicroOps = 5;
993 let ResourceCycles = [2, 3];
994}
995def : InstRW<[WriteCMPS], (instregex "CMPS(B|L|Q|W)")>;
996
Quentin Colombeta6c56f52014-08-18 17:55:21 +0000997//-- Synchronization instructions --//
998
999// XADD.
1000def WriteXADD : SchedWriteRes<[]> {
1001 let NumMicroOps = 5;
1002}
1003def : InstRW<[WriteXADD], (instregex "XADD(8|16|32|64)rm")>;
1004
1005// CMPXCHG.
1006def WriteCMPXCHG : SchedWriteRes<[]> {
1007 let NumMicroOps = 6;
1008}
1009def : InstRW<[WriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
1010
1011// CMPXCHG8B.
1012def WriteCMPXCHG8B : SchedWriteRes<[]> {
1013 let NumMicroOps = 15;
1014}
1015def : InstRW<[WriteCMPXCHG8B], (instregex "CMPXCHG8B")>;
1016
1017// CMPXCHG16B.
1018def WriteCMPXCHG16B : SchedWriteRes<[]> {
1019 let NumMicroOps = 22;
1020}
1021def : InstRW<[WriteCMPXCHG16B], (instregex "CMPXCHG16B")>;
1022
Quentin Colombet6e62be22014-08-18 17:55:23 +00001023//-- Other --//
1024
1025// PAUSE.
1026def WritePAUSE : SchedWriteRes<[HWPort05, HWPort6]> {
1027 let NumMicroOps = 5;
1028 let ResourceCycles = [1, 3];
1029}
1030def : InstRW<[WritePAUSE], (instregex "PAUSE")>;
1031
1032// LEAVE.
1033def : InstRW<[Write2P0156_P23], (instregex "LEAVE")>;
1034
1035// XGETBV.
1036def WriteXGETBV : SchedWriteRes<[]> {
1037 let NumMicroOps = 8;
1038}
1039def : InstRW<[WriteXGETBV], (instregex "XGETBV")>;
1040
1041// RDTSC.
1042def WriteRDTSC : SchedWriteRes<[]> {
1043 let NumMicroOps = 15;
1044}
1045def : InstRW<[WriteRDTSC], (instregex "RDTSC")>;
1046
1047// RDPMC.
1048def WriteRDPMC : SchedWriteRes<[]> {
1049 let NumMicroOps = 34;
1050}
1051def : InstRW<[WriteRDPMC], (instregex "RDPMC")>;
1052
1053// RDRAND.
1054def WriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> {
1055 let NumMicroOps = 17;
1056 let ResourceCycles = [1, 16];
1057}
1058def : InstRW<[WriteRDRAND], (instregex "RDRAND(16|32|64)r")>;
1059
Quentin Colombet0bc907e2014-08-18 17:55:26 +00001060//=== Floating Point x87 Instructions ===//
1061//-- Move instructions --//
1062
1063// FLD.
1064// m80.
1065def : InstRW<[WriteP01], (instregex "LD_Frr")>;
1066
1067def WriteLD_F80m : SchedWriteRes<[HWPort01, HWPort23]> {
1068 let Latency = 4;
1069 let NumMicroOps = 4;
1070 let ResourceCycles = [2, 2];
1071}
1072def : InstRW<[WriteLD_F80m], (instregex "LD_F80m")>;
1073
1074// FBLD.
1075// m80.
1076def WriteFBLD : SchedWriteRes<[]> {
1077 let Latency = 47;
1078 let NumMicroOps = 43;
1079}
1080def : InstRW<[WriteFBLD], (instregex "FBLDm")>;
1081
1082// FST(P).
1083// r.
1084def : InstRW<[WriteP01], (instregex "ST_(F|FP)rr")>;
1085
1086// m80.
1087def WriteST_FP80m : SchedWriteRes<[HWPort0156, HWPort23, HWPort4]> {
1088 let NumMicroOps = 7;
1089 let ResourceCycles = [3, 2, 2];
1090}
1091def : InstRW<[WriteST_FP80m], (instregex "ST_FP80m")>;
1092
1093// FBSTP.
1094// m80.
1095def WriteFBSTP : SchedWriteRes<[]> {
1096 let NumMicroOps = 226;
1097}
1098def : InstRW<[WriteFBSTP], (instregex "FBSTPm")>;
1099
1100// FXCHG.
1101def : InstRW<[WriteNop], (instregex "XCH_F")>;
1102
1103// FILD.
1104def WriteFILD : SchedWriteRes<[HWPort01, HWPort23]> {
1105 let Latency = 6;
1106 let NumMicroOps = 2;
1107}
1108def : InstRW<[WriteFILD], (instregex "ILD_F(16|32|64)m")>;
1109
1110// FIST(P) FISTTP.
1111def WriteFIST : SchedWriteRes<[HWPort1, HWPort23, HWPort4]> {
1112 let Latency = 7;
1113 let NumMicroOps = 3;
1114}
1115def : InstRW<[WriteFIST], (instregex "IST_(F|FP)(16|32)m")>;
1116
1117// FLDZ.
1118def : InstRW<[WriteP01], (instregex "LD_F0")>;
1119
1120// FLD1.
1121def : InstRW<[Write2P01], (instregex "LD_F1")>;
1122
1123// FLDPI FLDL2E etc.
1124def : InstRW<[Write2P01], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>;
1125
1126// FCMOVcc.
1127def WriteFCMOVcc : SchedWriteRes<[HWPort0, HWPort5]> {
1128 let Latency = 2;
1129 let NumMicroOps = 3;
1130 let ResourceCycles = [2, 1];
1131}
1132def : InstRW<[WriteFCMOVcc], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>;
1133
1134// FNSTSW.
1135// AX.
1136def WriteFNSTSW : SchedWriteRes<[HWPort0, HWPort0156]> {
1137 let NumMicroOps = 2;
1138}
1139def : InstRW<[WriteFNSTSW], (instregex "FNSTSW16r")>;
1140
1141// m16.
1142def WriteFNSTSWm : SchedWriteRes<[HWPort0, HWPort4, HWPort237]> {
1143 let Latency = 6;
1144 let NumMicroOps = 3;
1145}
1146def : InstRW<[WriteFNSTSWm], (instregex "FNSTSWm")>;
1147
1148// FLDCW.
1149def WriteFLDCW : SchedWriteRes<[HWPort01, HWPort23, HWPort6]> {
1150 let Latency = 7;
1151 let NumMicroOps = 3;
1152}
1153def : InstRW<[WriteFLDCW], (instregex "FLDCW16m")>;
1154
1155// FNSTCW.
1156def WriteFNSTCW : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
1157 let NumMicroOps = 3;
1158}
1159def : InstRW<[WriteFNSTCW], (instregex "FNSTCW16m")>;
1160
1161// FINCSTP FDECSTP.
1162def : InstRW<[WriteP01], (instregex "FINCSTP", "FDECSTP")>;
1163
1164// FFREE.
1165def : InstRW<[WriteP01], (instregex "FFREE")>;
1166
1167// FNSAVE.
1168def WriteFNSAVE : SchedWriteRes<[]> {
1169 let NumMicroOps = 147;
1170}
1171def : InstRW<[WriteFNSAVE], (instregex "FSAVEm")>;
1172
1173// FRSTOR.
1174def WriteFRSTOR : SchedWriteRes<[]> {
1175 let NumMicroOps = 90;
1176}
1177def : InstRW<[WriteFRSTOR], (instregex "FRSTORm")>;
1178
Quentin Colombet456c9912014-08-18 17:55:29 +00001179//-- Arithmetic instructions --//
1180
1181// FABS.
1182def : InstRW<[WriteP0], (instregex "ABS_F")>;
1183
1184// FCHS.
1185def : InstRW<[WriteP0], (instregex "CHS_F")>;
1186
1187// FCOM(P) FUCOM(P).
1188// r.
1189def : InstRW<[WriteP1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr",
1190 "UCOM_FPr")>;
1191// m.
1192def : InstRW<[WriteP1_P23], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>;
1193
1194// FCOMPP FUCOMPP.
1195// r.
1196def : InstRW<[Write2P01], (instregex "FCOMPP", "UCOM_FPPr")>;
1197
1198// FCOMI(P) FUCOMI(P).
1199// m.
1200def : InstRW<[Write3P01], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr",
1201 "UCOM_FIPr")>;
1202
1203// FICOM(P).
1204def : InstRW<[Write2P1_P23], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>;
1205
1206// FTST.
1207def : InstRW<[WriteP1], (instregex "TST_F")>;
1208
1209// FXAM.
1210def : InstRW<[Write2P1], (instregex "FXAM")>;
1211
1212// FPREM.
1213def WriteFPREM : SchedWriteRes<[]> {
1214 let Latency = 19;
1215 let NumMicroOps = 28;
1216}
1217def : InstRW<[WriteFPREM], (instregex "FPREM")>;
1218
1219// FPREM1.
1220def WriteFPREM1 : SchedWriteRes<[]> {
1221 let Latency = 27;
1222 let NumMicroOps = 41;
1223}
1224def : InstRW<[WriteFPREM1], (instregex "FPREM1")>;
1225
1226// FRNDINT.
1227def WriteFRNDINT : SchedWriteRes<[]> {
1228 let Latency = 11;
1229 let NumMicroOps = 17;
1230}
1231def : InstRW<[WriteFRNDINT], (instregex "FRNDINT")>;
1232
Quentin Colombet33b0bf22014-08-18 17:55:32 +00001233//-- Math instructions --//
1234
1235// FSCALE.
1236def WriteFSCALE : SchedWriteRes<[]> {
1237 let Latency = 75; // 49-125
1238 let NumMicroOps = 50; // 25-75
1239}
1240def : InstRW<[WriteFSCALE], (instregex "FSCALE")>;
1241
1242// FXTRACT.
1243def WriteFXTRACT : SchedWriteRes<[]> {
1244 let Latency = 15;
1245 let NumMicroOps = 17;
1246}
1247def : InstRW<[WriteFXTRACT], (instregex "FXTRACT")>;
1248
Quentin Colombetf68e0942014-08-18 17:55:36 +00001249//-- Other instructions --//
1250
1251// FNOP.
1252def : InstRW<[WriteP01], (instregex "FNOP")>;
1253
1254// WAIT.
1255def : InstRW<[Write2P01], (instregex "WAIT")>;
1256
1257// FNCLEX.
1258def : InstRW<[Write5P0156], (instregex "FNCLEX")>;
1259
1260// FNINIT.
1261def WriteFNINIT : SchedWriteRes<[]> {
1262 let NumMicroOps = 26;
1263}
1264def : InstRW<[WriteFNINIT], (instregex "FNINIT")>;
1265
1266//=== Integer MMX and XMM Instructions ===//
1267//-- Move instructions --//
1268
1269// MOVD.
1270// r32/64 <- (x)mm.
1271def : InstRW<[WriteP0], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr",
1272 "VMOVPDI2DIrr", "MOVPDI2DIrr")>;
1273
1274// (x)mm <- r32/64.
1275def : InstRW<[WriteP5], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr",
1276 "VMOVDI2PDIrr", "MOVDI2PDIrr")>;
1277
1278// MOVQ.
1279// r64 <- (x)mm.
1280def : InstRW<[WriteP0], (instregex "VMOVPQIto64rr")>;
1281
1282// (x)mm <- r64.
1283def : InstRW<[WriteP5], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>;
1284
1285// (x)mm <- (x)mm.
1286def : InstRW<[WriteP015], (instregex "MMX_MOVQ64rr")>;
1287
1288// (V)MOVDQA/U.
1289// x <- x.
1290def : InstRW<[WriteP015], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr",
1291 "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV",
1292 "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>;
1293
1294// MOVDQ2Q.
1295def : InstRW<[WriteP01_P5], (instregex "MMX_MOVDQ2Qrr")>;
1296
1297// MOVQ2DQ.
1298def : InstRW<[WriteP015], (instregex "MMX_MOVQ2DQrr")>;
1299
1300
1301// PACKSSWB/DW.
1302// mm <- mm.
1303def WriteMMXPACKSSrr : SchedWriteRes<[HWPort5]> {
1304 let Latency = 2;
1305 let NumMicroOps = 3;
1306 let ResourceCycles = [3];
1307}
1308def : InstRW<[WriteMMXPACKSSrr], (instregex "MMX_PACKSSDWirr",
1309 "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>;
1310
1311// mm <- m64.
1312def WriteMMXPACKSSrm : SchedWriteRes<[HWPort23, HWPort5]> {
1313 let Latency = 4;
1314 let NumMicroOps = 3;
1315 let ResourceCycles = [1, 3];
1316}
1317def : InstRW<[WriteMMXPACKSSrm], (instregex "MMX_PACKSSDWirm",
1318 "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>;
1319
1320// VPMOVSX/ZX BW BD BQ DW DQ.
1321// y <- x.
1322def WriteVPMOVSX : SchedWriteRes<[HWPort5]> {
1323 let Latency = 3;
1324 let NumMicroOps = 1;
1325}
1326def : InstRW<[WriteVPMOVSX], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>;
1327
1328// PBLENDW.
1329// x,x,i / v,v,v,i
1330def WritePBLENDWr : SchedWriteRes<[HWPort5]>;
1331def : InstRW<[WritePBLENDWr], (instregex "(V?)PBLENDW(Y?)rri")>;
1332
1333// x,m,i / v,v,m,i
1334def WritePBLENDWm : SchedWriteRes<[HWPort5, HWPort23]> {
1335 let NumMicroOps = 2;
1336 let Latency = 4;
1337 let ResourceCycles = [1, 1];
1338}
1339def : InstRW<[WritePBLENDWm, ReadAfterLd], (instregex "(V?)PBLENDW(Y?)rmi")>;
1340
1341// VPBLENDD.
1342// v,v,v,i.
1343def WriteVPBLENDDr : SchedWriteRes<[HWPort015]>;
1344def : InstRW<[WriteVPBLENDDr], (instregex "VPBLENDD(Y?)rri")>;
1345
1346// v,v,m,i
1347def WriteVPBLENDDm : SchedWriteRes<[HWPort015, HWPort23]> {
1348 let NumMicroOps = 2;
1349 let Latency = 4;
1350 let ResourceCycles = [1, 1];
1351}
1352def : InstRW<[WriteVPBLENDDm, ReadAfterLd], (instregex "VPBLENDD(Y?)rmi")>;
1353
1354// MASKMOVQ.
1355def WriteMASKMOVQ : SchedWriteRes<[HWPort0, HWPort4, HWPort23]> {
1356 let Latency = 13;
1357 let NumMicroOps = 4;
1358 let ResourceCycles = [1, 1, 2];
1359}
1360def : InstRW<[WriteMASKMOVQ], (instregex "MMX_MASKMOVQ(64)?")>;
1361
1362// MASKMOVDQU.
1363def WriteMASKMOVDQU : SchedWriteRes<[HWPort04, HWPort56, HWPort23]> {
1364 let Latency = 14;
1365 let NumMicroOps = 10;
1366 let ResourceCycles = [4, 2, 4];
1367}
1368def : InstRW<[WriteMASKMOVDQU], (instregex "(V?)MASKMOVDQU(64)?")>;
1369
1370// VPMASKMOV D/Q.
1371// v,v,m.
1372def WriteVPMASKMOVr : SchedWriteRes<[HWPort5, HWPort23]> {
1373 let Latency = 4;
1374 let NumMicroOps = 3;
1375 let ResourceCycles = [2, 1];
1376}
1377def : InstRW<[WriteVPMASKMOVr, ReadAfterLd],
1378 (instregex "VPMASKMOV(D|Q)(Y?)rm")>;
1379
1380// m, v,v.
1381def WriteVPMASKMOVm : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
1382 let Latency = 13;
1383 let NumMicroOps = 4;
1384 let ResourceCycles = [1, 1, 1, 1];
1385}
1386def : InstRW<[WriteVPMASKMOVm], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
1387
1388// PMOVMSKB.
1389def WritePMOVMSKB : SchedWriteRes<[HWPort0]> {
1390 let Latency = 3;
1391}
1392def : InstRW<[WritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKB(Y?)rr")>;
1393
1394// PEXTR B/W/D/Q.
1395// r32,x,i.
1396def WritePEXTRr : SchedWriteRes<[HWPort0, HWPort5]> {
1397 let Latency = 2;
1398 let NumMicroOps = 2;
1399 let ResourceCycles = [1, 1];
1400}
1401def : InstRW<[WritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>;
1402
1403// m8,x,i.
1404def WritePEXTRm : SchedWriteRes<[HWPort23, HWPort4, HWPort5]> {
1405 let NumMicroOps = 3;
1406 let ResourceCycles = [1, 1, 1];
1407}
1408def : InstRW<[WritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>;
1409
1410// VPBROADCAST B/W.
1411// x, m8/16.
1412def WriteVPBROADCAST128Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> {
1413 let Latency = 5;
1414 let NumMicroOps = 3;
1415 let ResourceCycles = [1, 1, 1];
1416}
1417def : InstRW<[WriteVPBROADCAST128Ld, ReadAfterLd],
1418 (instregex "VPBROADCAST(B|W)rm")>;
1419
1420// y, m8/16
1421def WriteVPBROADCAST256Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> {
1422 let Latency = 7;
1423 let NumMicroOps = 3;
1424 let ResourceCycles = [1, 1, 1];
1425}
1426def : InstRW<[WriteVPBROADCAST256Ld, ReadAfterLd],
1427 (instregex "VPBROADCAST(B|W)Yrm")>;
1428
1429// VPGATHERDD.
1430// x.
1431def WriteVPGATHERDD128 : SchedWriteRes<[]> {
1432 let NumMicroOps = 20;
1433}
1434def : InstRW<[WriteVPGATHERDD128, ReadAfterLd], (instregex "VPGATHERDDrm")>;
1435
1436// y.
1437def WriteVPGATHERDD256 : SchedWriteRes<[]> {
1438 let NumMicroOps = 34;
1439}
1440def : InstRW<[WriteVPGATHERDD256, ReadAfterLd], (instregex "VPGATHERDDYrm")>;
1441
1442// VPGATHERQD.
1443// x.
1444def WriteVPGATHERQD128 : SchedWriteRes<[]> {
1445 let NumMicroOps = 15;
1446}
1447def : InstRW<[WriteVPGATHERQD128, ReadAfterLd], (instregex "VPGATHERQDrm")>;
1448
1449// y.
1450def WriteVPGATHERQD256 : SchedWriteRes<[]> {
1451 let NumMicroOps = 22;
1452}
1453def : InstRW<[WriteVPGATHERQD256, ReadAfterLd], (instregex "VPGATHERQDYrm")>;
1454
1455// VPGATHERDQ.
1456// x.
1457def WriteVPGATHERDQ128 : SchedWriteRes<[]> {
1458 let NumMicroOps = 12;
1459}
1460def : InstRW<[WriteVPGATHERDQ128, ReadAfterLd], (instregex "VPGATHERDQrm")>;
1461
1462// y.
1463def WriteVPGATHERDQ256 : SchedWriteRes<[]> {
1464 let NumMicroOps = 20;
1465}
1466def : InstRW<[WriteVPGATHERDQ256, ReadAfterLd], (instregex "VPGATHERDQYrm")>;
1467
1468// VPGATHERQQ.
1469// x.
1470def WriteVPGATHERQQ128 : SchedWriteRes<[]> {
1471 let NumMicroOps = 14;
1472}
1473def : InstRW<[WriteVPGATHERQQ128, ReadAfterLd], (instregex "VPGATHERQQrm")>;
1474
1475// y.
1476def WriteVPGATHERQQ256 : SchedWriteRes<[]> {
1477 let NumMicroOps = 22;
1478}
1479def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>;
1480
Quentin Colombete9f8b4b2014-08-18 17:55:39 +00001481//-- Arithmetic instructions --//
1482
1483// PHADD|PHSUB (S) W/D.
1484// v <- v,v.
1485def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> {
1486 let Latency = 3;
1487 let NumMicroOps = 3;
1488 let ResourceCycles = [1, 2];
1489}
1490def : InstRW<[WritePHADDSUBr], (instregex "MMX_PHADD(W?)rr64",
1491 "MMX_PHADDSWrr64",
1492 "MMX_PHSUB(W|D)rr64",
1493 "MMX_PHSUBSWrr64",
1494 "(V?)PH(ADD|SUB)(W|D)(Y?)rr",
1495 "(V?)PH(ADD|SUB)SWrr(256)?")>;
1496
1497// v <- v,m.
1498def WritePHADDSUBm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
1499 let Latency = 6;
1500 let NumMicroOps = 3;
1501 let ResourceCycles = [1, 2, 1];
1502}
1503def : InstRW<[WritePHADDSUBm, ReadAfterLd],
1504 (instregex "MMX_PHADD(W?)rm64",
1505 "MMX_PHADDSWrm64",
1506 "MMX_PHSUB(W|D)rm64",
1507 "MMX_PHSUBSWrm64",
1508 "(V?)PH(ADD|SUB)(W|D)(Y?)rm",
1509 "(V?)PH(ADD|SUB)SWrm(128|256)?")>;
1510
1511// PCMPGTQ.
1512// v <- v,v.
1513def WritePCMPGTQr : SchedWriteRes<[HWPort0]> {
1514 let Latency = 5;
1515 let NumMicroOps = 1;
1516}
1517def : InstRW<[WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
1518
1519// v <- v,m.
1520def WritePCMPGTQm : SchedWriteRes<[HWPort0, HWPort23]> {
1521 let Latency = 5;
1522 let NumMicroOps = 2;
1523 let ResourceCycles = [1, 1];
1524}
1525def : InstRW<[WritePCMPGTQm, ReadAfterLd], (instregex "(V?)PCMPGTQ(Y?)rm")>;
1526
1527// PMULLD.
1528// x,x / y,y,y.
1529def WritePMULLDr : SchedWriteRes<[HWPort0]> {
1530 let Latency = 10;
1531 let NumMicroOps = 2;
1532 let ResourceCycles = [2];
1533}
1534def : InstRW<[WritePMULLDr], (instregex "(V?)PMULLD(Y?)rr")>;
1535
1536// x,m / y,y,m.
1537def WritePMULLDm : SchedWriteRes<[HWPort0, HWPort23]> {
1538 let Latency = 10;
1539 let NumMicroOps = 3;
1540 let ResourceCycles = [2, 1];
1541}
1542def : InstRW<[WritePMULLDm, ReadAfterLd], (instregex "(V?)PMULLD(Y?)rm")>;
1543
Quentin Colombet91513d92014-08-18 17:55:41 +00001544//-- Logic instructions --//
1545
1546// PTEST.
1547// v,v.
1548def WritePTESTr : SchedWriteRes<[HWPort0, HWPort5]> {
1549 let Latency = 2;
1550 let NumMicroOps = 2;
1551 let ResourceCycles = [1, 1];
1552}
1553def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rr")>;
1554
1555// v,m.
1556def WritePTESTm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> {
1557 let Latency = 6;
1558 let NumMicroOps = 3;
1559 let ResourceCycles = [1, 1, 1];
1560}
1561def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rm")>;
1562
1563// PSLL,PSRL,PSRA W/D/Q.
1564// x,x / v,v,x.
1565def WritePShift : SchedWriteRes<[HWPort0, HWPort5]> {
1566 let Latency = 2;
1567 let NumMicroOps = 2;
1568 let ResourceCycles = [1, 1];
1569}
1570def : InstRW<[WritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)(Y?)rr")>;
1571
1572// PSLL,PSRL DQ.
1573def : InstRW<[WriteP5], (instregex "(V?)PS(R|L)LDQ(Y?)ri")>;
1574
Quentin Colombetbd115632014-08-18 17:55:43 +00001575//-- Other --//
1576
1577// EMMS.
1578def WriteEMMS : SchedWriteRes<[]> {
1579 let Latency = 13;
1580 let NumMicroOps = 31;
1581}
1582def : InstRW<[WriteEMMS], (instregex "MMX_EMMS")>;
1583
Quentin Colombet71cdecd2014-08-18 17:55:46 +00001584//=== Floating Point XMM and YMM Instructions ===//
1585//-- Move instructions --//
1586
1587// MOVMSKP S/D.
1588// r32 <- x.
1589def WriteMOVMSKPr : SchedWriteRes<[HWPort0]> {
1590 let Latency = 3;
1591}
1592def : InstRW<[WriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)rr")>;
1593
1594// r32 <- y.
1595def WriteVMOVMSKPYr : SchedWriteRes<[HWPort0]> {
1596 let Latency = 2;
1597}
1598def : InstRW<[WriteVMOVMSKPYr], (instregex "VMOVMSKP(S|D)Yrr")>;
1599
1600// VPERM2F128.
1601def : InstRW<[WriteFShuffle256], (instregex "VPERM2F128rr")>;
1602def : InstRW<[WriteFShuffle256Ld, ReadAfterLd], (instregex "VPERM2F128rm")>;
1603
1604// BLENDVP S/D.
1605def : InstRW<[WriteFVarBlend], (instregex "BLENDVP(S|D)rr0")>;
1606def : InstRW<[WriteFVarBlendLd, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>;
1607
1608// VBROADCASTF128.
1609def : InstRW<[WriteLoad], (instregex "VBROADCASTF128")>;
1610
1611// EXTRACTPS.
1612// r32,x,i.
1613def WriteEXTRACTPSr : SchedWriteRes<[HWPort0, HWPort5]> {
1614 let NumMicroOps = 2;
1615 let ResourceCycles = [1, 1];
1616}
1617def : InstRW<[WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
1618
1619// m32,x,i.
1620def WriteEXTRACTPSm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> {
1621 let Latency = 4;
1622 let NumMicroOps = 3;
1623 let ResourceCycles = [1, 1, 1];
1624}
1625def : InstRW<[WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
1626
1627// VEXTRACTF128.
1628// x,y,i.
1629def : InstRW<[WriteFShuffle256], (instregex "VEXTRACTF128rr")>;
1630
1631// m128,y,i.
1632def WriteVEXTRACTF128m : SchedWriteRes<[HWPort23, HWPort4]> {
1633 let Latency = 4;
1634 let NumMicroOps = 2;
1635 let ResourceCycles = [1, 1];
1636}
1637def : InstRW<[WriteVEXTRACTF128m], (instregex "VEXTRACTF128mr")>;
1638
1639// VINSERTF128.
1640// y,y,x,i.
1641def : InstRW<[WriteFShuffle256], (instregex "VINSERTF128rr")>;
1642
1643// y,y,m128,i.
1644def WriteVINSERTF128m : SchedWriteRes<[HWPort015, HWPort23]> {
1645 let Latency = 4;
1646 let NumMicroOps = 2;
1647 let ResourceCycles = [1, 1];
1648}
1649def : InstRW<[WriteFShuffle256, ReadAfterLd], (instregex "VINSERTF128rm")>;
1650
1651// VMASKMOVP S/D.
1652// v,v,m.
1653def WriteVMASKMOVPrm : SchedWriteRes<[HWPort5, HWPort23]> {
1654 let Latency = 4;
1655 let NumMicroOps = 3;
1656 let ResourceCycles = [2, 1];
1657}
1658def : InstRW<[WriteVMASKMOVPrm], (instregex "VMASKMOVP(S|D)(Y?)rm")>;
1659
1660// m128,x,x.
1661def WriteVMASKMOVPmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
1662 let Latency = 13;
1663 let NumMicroOps = 4;
1664 let ResourceCycles = [1, 1, 1, 1];
1665}
1666def : InstRW<[WriteVMASKMOVPmr], (instregex "VMASKMOVP(S|D)mr")>;
1667
1668// m256,y,y.
1669def WriteVMASKMOVPYmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
1670 let Latency = 14;
1671 let NumMicroOps = 4;
1672 let ResourceCycles = [1, 1, 1, 1];
1673}
1674def : InstRW<[WriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>;
1675
1676// VGATHERDPS.
1677// x.
1678def WriteVGATHERDPS128 : SchedWriteRes<[]> {
1679 let NumMicroOps = 20;
1680}
1681def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>;
1682
1683// y.
1684def WriteVGATHERDPS256 : SchedWriteRes<[]> {
1685 let NumMicroOps = 34;
1686}
1687def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>;
1688
1689// VGATHERQPS.
1690// x.
1691def WriteVGATHERQPS128 : SchedWriteRes<[]> {
1692 let NumMicroOps = 15;
1693}
1694def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>;
1695
1696// y.
1697def WriteVGATHERQPS256 : SchedWriteRes<[]> {
1698 let NumMicroOps = 22;
1699}
1700def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>;
1701
1702// VGATHERDPD.
1703// x.
1704def WriteVGATHERDPD128 : SchedWriteRes<[]> {
1705 let NumMicroOps = 12;
1706}
1707def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>;
1708
1709// y.
1710def WriteVGATHERDPD256 : SchedWriteRes<[]> {
1711 let NumMicroOps = 20;
1712}
1713def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>;
1714
1715// VGATHERQPD.
1716// x.
1717def WriteVGATHERQPD128 : SchedWriteRes<[]> {
1718 let NumMicroOps = 14;
1719}
1720def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>;
1721
1722// y.
1723def WriteVGATHERQPD256 : SchedWriteRes<[]> {
1724 let NumMicroOps = 22;
1725}
1726def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>;
1727
Quentin Colombetca74f232014-08-18 17:55:49 +00001728//-- Conversion instructions --//
1729
1730// CVTPD2PS.
1731// x,x.
1732def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVTPD2PSrr")>;
1733
1734// x,m128.
1735def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVTPD2PS(X?)rm")>;
1736
1737// x,y.
1738def WriteCVTPD2PSYrr : SchedWriteRes<[HWPort1, HWPort5]> {
1739 let Latency = 5;
1740 let NumMicroOps = 2;
1741 let ResourceCycles = [1, 1];
1742}
1743def : InstRW<[WriteCVTPD2PSYrr], (instregex "(V?)CVTPD2PSYrr")>;
1744
1745// x,m256.
1746def WriteCVTPD2PSYrm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
1747 let Latency = 9;
1748 let NumMicroOps = 3;
1749 let ResourceCycles = [1, 1, 1];
1750}
1751def : InstRW<[WriteCVTPD2PSYrm], (instregex "(V?)CVTPD2PSYrm")>;
1752
1753// CVTSD2SS.
1754// x,x.
1755def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V)?CVTSD2SSrr")>;
1756
1757// x,m64.
1758def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(Int_)?(V)?CVTSD2SSrm")>;
1759
1760// CVTPS2PD.
1761// x,x.
1762def WriteCVTPS2PDrr : SchedWriteRes<[HWPort0, HWPort5]> {
1763 let Latency = 2;
1764 let NumMicroOps = 2;
1765 let ResourceCycles = [1, 1];
1766}
1767def : InstRW<[WriteCVTPS2PDrr], (instregex "(V?)CVTPS2PDrr")>;
1768
1769// x,m64.
1770// y,m128.
1771def WriteCVTPS2PDrm : SchedWriteRes<[HWPort0, HWPort23]> {
1772 let Latency = 5;
1773 let NumMicroOps = 2;
1774 let ResourceCycles = [1, 1];
1775}
1776def : InstRW<[WriteCVTPS2PDrm], (instregex "(V?)CVTPS2PD(Y?)rm")>;
1777
1778// y,x.
1779def WriteVCVTPS2PDYrr : SchedWriteRes<[HWPort0, HWPort5]> {
1780 let Latency = 5;
1781 let NumMicroOps = 2;
1782 let ResourceCycles = [1, 1];
1783}
1784def : InstRW<[WriteVCVTPS2PDYrr], (instregex "VCVTPS2PDYrr")>;
1785
1786// CVTSS2SD.
1787// x,x.
1788def WriteCVTSS2SDrr : SchedWriteRes<[HWPort0, HWPort5]> {
1789 let Latency = 2;
1790 let NumMicroOps = 2;
1791 let ResourceCycles = [1, 1];
1792}
1793def : InstRW<[WriteCVTSS2SDrr], (instregex "(Int_)?(V?)CVTSS2SDrr")>;
1794
1795// x,m32.
1796def WriteCVTSS2SDrm : SchedWriteRes<[HWPort0, HWPort23]> {
1797 let Latency = 5;
1798 let NumMicroOps = 2;
1799 let ResourceCycles = [1, 1];
1800}
1801def : InstRW<[WriteCVTSS2SDrm], (instregex "(Int_)?(V?)CVTSS2SDrm")>;
1802
1803// CVTDQ2PD.
1804// x,x.
1805def : InstRW<[WriteP1_P5_Lat4], (instregex "(V)?CVTDQ2PDrr")>;
1806
1807// y,x.
1808def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVTDQ2PDYrr")>;
1809
1810// CVT(T)PD2DQ.
1811// x,x.
1812def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVT(T?)PD2DQrr")>;
1813// x,m128.
1814def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVT(T?)PD2DQrm")>;
1815// x,y.
1816def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVT(T?)PD2DQYrr")>;
1817// x,m256.
1818def : InstRW<[WriteP1_P5_Lat6Ld], (instregex "VCVT(T?)PD2DQYrm")>;
1819
1820// CVT(T)PS2PI.
1821// mm,x.
1822def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PS2PIirr")>;
1823
1824// CVTPI2PD.
1825// x,mm.
1826def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PI2PDirr")>;
1827
1828// CVT(T)PD2PI.
1829// mm,x.
1830def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PD2PIirr")>;
1831
1832// CVSTSI2SS.
1833// x,r32.
1834def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>;
1835
1836// CVT(T)SS2SI.
1837// r32,x.
1838def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>;
1839// r32,m32.
1840def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>;
1841
1842// CVTSI2SD.
1843// x,r32/64.
1844def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>;
1845
1846// CVTSD2SI.
1847// r32/64
1848def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rr")>;
1849// r32,m32.
1850def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rm")>;
1851
1852// VCVTPS2PH.
1853// x,v,i.
1854def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPS2PH(Y?)rr")>;
1855// m,v,i.
1856def : InstRW<[WriteP1_P5_Lat4Ld, WriteRMW], (instregex "VCVTPS2PH(Y?)mr")>;
1857
1858// VCVTPH2PS.
1859// v,x.
1860def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPH2PS(Y?)rr")>;
1861
Quentin Colombet45c469c2014-08-18 17:55:51 +00001862//-- Arithmetic instructions --//
1863
1864// HADD, HSUB PS/PD
1865// x,x / v,v,v.
1866def WriteHADDSUBPr : SchedWriteRes<[HWPort1, HWPort5]> {
1867 let Latency = 5;
1868 let NumMicroOps = 3;
1869 let ResourceCycles = [1, 2];
1870}
1871def : InstRW<[WriteHADDSUBPr], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rr")>;
1872
1873// x,m / v,v,m.
1874def WriteHADDSUBPm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
1875 let Latency = 9;
1876 let NumMicroOps = 4;
1877 let ResourceCycles = [1, 2, 1];
1878}
1879def : InstRW<[WriteHADDSUBPm], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rm")>;
1880
1881// MULL SS/SD PS/PD.
1882// x,x / v,v,v.
1883def WriteMULr : SchedWriteRes<[HWPort01]> {
1884 let Latency = 5;
1885}
1886def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>;
1887
1888// x,m / v,v,m.
1889def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> {
1890 let Latency = 4;
1891 let NumMicroOps = 2;
1892 let ResourceCycles = [1, 1];
1893}
1894def : InstRW<[WriteMULm], (instregex "(V?)MUL(P|S)(S|D)rm")>;
1895
1896// VDIVPS.
1897// y,y,y.
1898def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> {
1899 let Latency = 19; // 18-21 cycles.
1900 let NumMicroOps = 3;
1901 let ResourceCycles = [2, 1];
1902}
1903def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>;
1904
1905// y,y,m256.
1906def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
1907 let Latency = 23; // 18-21 + 4 cycles.
1908 let NumMicroOps = 4;
1909 let ResourceCycles = [2, 1, 1];
1910}
1911def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>;
1912
1913// VDIVPD.
1914// y,y,y.
1915def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> {
1916 let Latency = 27; // 19-35 cycles.
1917 let NumMicroOps = 3;
1918 let ResourceCycles = [2, 1];
1919}
1920def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>;
1921
1922// y,y,m256.
1923def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
1924 let Latency = 31; // 19-35 + 4 cycles.
1925 let NumMicroOps = 4;
1926 let ResourceCycles = [2, 1, 1];
1927}
1928def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>;
1929
1930// VRCPPS.
1931// y,y.
1932def WriteVRCPPSr : SchedWriteRes<[HWPort0, HWPort15]> {
1933 let Latency = 7;
1934 let NumMicroOps = 3;
1935 let ResourceCycles = [2, 1];
1936}
1937def : InstRW<[WriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>;
1938
1939// y,m256.
1940def WriteVRCPPSm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
1941 let Latency = 11;
1942 let NumMicroOps = 4;
1943 let ResourceCycles = [2, 1, 1];
1944}
1945def : InstRW<[WriteVRCPPSm], (instregex "VRCPPSYm(_Int)?")>;
1946
1947// ROUND SS/SD PS/PD.
1948// v,v,i.
1949def WriteROUNDr : SchedWriteRes<[HWPort1]> {
1950 let Latency = 6;
1951 let NumMicroOps = 2;
1952 let ResourceCycles = [2];
1953}
1954def : InstRW<[WriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>;
1955
1956// v,m,i.
1957def WriteROUNDm : SchedWriteRes<[HWPort1, HWPort23]> {
1958 let Latency = 10;
1959 let NumMicroOps = 3;
1960 let ResourceCycles = [2, 1];
1961}
1962def : InstRW<[WriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>;
1963
1964// DPPS.
1965// x,x,i / v,v,v,i.
1966def WriteDPPSr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> {
1967 let Latency = 14;
1968 let NumMicroOps = 4;
1969 let ResourceCycles = [2, 1, 1];
1970}
1971def : InstRW<[WriteDPPSr], (instregex "(V?)DPPS(Y?)rri")>;
1972
1973// x,m,i / v,v,m,i.
1974def WriteDPPSm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23, HWPort6]> {
1975 let Latency = 18;
1976 let NumMicroOps = 6;
1977 let ResourceCycles = [2, 1, 1, 1, 1];
1978}
1979def : InstRW<[WriteDPPSm, ReadAfterLd], (instregex "(V?)DPPS(Y?)rmi")>;
1980
1981// DPPD.
1982// x,x,i.
1983def WriteDPPDr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> {
1984 let Latency = 9;
1985 let NumMicroOps = 3;
1986 let ResourceCycles = [1, 1, 1];
1987}
1988def : InstRW<[WriteDPPDr], (instregex "(V?)DPPDrri")>;
1989
1990// x,m,i.
1991def WriteDPPDm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23]> {
1992 let Latency = 13;
1993 let NumMicroOps = 4;
1994 let ResourceCycles = [1, 1, 1, 1];
1995}
1996def : InstRW<[WriteDPPDm], (instregex "(V?)DPPDrmi")>;
1997
1998// VFMADD.
1999// v,v,v.
2000def WriteFMADDr : SchedWriteRes<[HWPort01]> {
2001 let Latency = 5;
2002 let NumMicroOps = 1;
2003}
2004def : InstRW<[WriteFMADDr],
2005 (instregex
2006 // 3p forms.
2007 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?",
2008 // 3s forms.
2009 "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)r",
2010 // 4s/4s_int forms.
2011 "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
2012 // 4p forms.
2013 "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>;
2014
2015// v,v,m.
2016def WriteFMADDm : SchedWriteRes<[HWPort01, HWPort23]> {
2017 let Latency = 9;
2018 let NumMicroOps = 2;
2019 let ResourceCycles = [1, 1];
2020}
2021def : InstRW<[WriteFMADDm],
2022 (instregex
2023 // 3p forms.
2024 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?",
2025 // 3s forms.
2026 "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)m",
2027 // 4s/4s_int forms.
2028 "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
2029 // 4p forms.
2030 "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>;
2031
Quentin Colombetc13c50e2014-08-18 17:55:53 +00002032//-- Math instructions --//
2033
2034// VSQRTPS.
2035// y,y.
2036def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> {
2037 let Latency = 19;
2038 let NumMicroOps = 3;
2039 let ResourceCycles = [2, 1];
2040}
2041def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>;
2042
2043// y,m256.
2044def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
2045 let Latency = 23;
2046 let NumMicroOps = 4;
2047 let ResourceCycles = [2, 1, 1];
2048}
2049def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>;
2050
2051// VSQRTPD.
2052// y,y.
2053def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> {
2054 let Latency = 28;
2055 let NumMicroOps = 3;
2056 let ResourceCycles = [2, 1];
2057}
2058def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>;
2059
2060// y,m256.
2061def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
2062 let Latency = 32;
2063 let NumMicroOps = 4;
2064 let ResourceCycles = [2, 1, 1];
2065}
2066def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>;
2067
2068// RSQRT SS/PS.
2069// x,x.
2070def WriteRSQRTr : SchedWriteRes<[HWPort0]> {
2071 let Latency = 5;
2072}
2073def : InstRW<[WriteRSQRTr], (instregex "(V?)RSQRT(SS|PS)r(_Int)?")>;
2074
2075// x,m128.
2076def WriteRSQRTm : SchedWriteRes<[HWPort0, HWPort23]> {
2077 let Latency = 9;
2078 let NumMicroOps = 2;
2079 let ResourceCycles = [1, 1];
2080}
2081def : InstRW<[WriteRSQRTm], (instregex "(V?)RSQRT(SS|PS)m(_Int)?")>;
2082
2083// RSQRTPS 256.
2084// y,y.
2085def WriteRSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> {
2086 let Latency = 7;
2087 let NumMicroOps = 3;
2088 let ResourceCycles = [2, 1];
2089}
2090def : InstRW<[WriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>;
2091
2092// y,m256.
2093def WriteRSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
2094 let Latency = 11;
2095 let NumMicroOps = 4;
2096 let ResourceCycles = [2, 1, 1];
2097}
2098def : InstRW<[WriteRSQRTPSYm], (instregex "VRSQRTPSYm(_Int)?")>;
2099
Nadav Roteme7b6a8a2013-03-28 22:34:46 +00002100} // SchedModel