Blame - llvm/lib/Target/X86/X86SchedHaswell.td - toolchain/llvm-project

blob: 62ba2bc1c9ed27ff7e739d0b6e8277dee7753ad6 [file] [log] [blame]

Nadav Rotem	e7b6a8a	2013-03-28 22:34:46 +0000	[diff] [blame]	1	//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------- tablegen --=//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the machine model for Haswell to support instruction
				11	// scheduling and other instruction cost heuristics.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	def HaswellModel : SchedMachineModel {
				16	// All x86 instructions are modeled as a single micro-op, and HW can decode 4
				17	// instructions per cycle.
				18	let IssueWidth = 4;
Andrew Trick	18dc3da	2013-06-15 04:50:02 +0000	[diff] [blame]	19	let MicroOpBufferSize = 192; // Based on the reorder buffer.
Nadav Rotem	e7b6a8a	2013-03-28 22:34:46 +0000	[diff] [blame]	20	let LoadLatency = 4;
Nadav Rotem	e7b6a8a	2013-03-28 22:34:46 +0000	[diff] [blame]	21	let MispredictPenalty = 16;
				22	}
				23
				24	let SchedModel = HaswellModel in {
				25
				26	// Haswell can issue micro-ops to 8 different ports in one cycle.
				27
				28	// Ports 0, 1, 5, 6 and 7 handle all computation.
				29	// Port 4 gets the data half of stores. Store data can be available later than
				30	// the store address, but since we don't model the latency of stores, we can
				31	// ignore that.
				32	// Ports 2 and 3 are identical. They handle loads and the address half of
				33	// stores. Port 7 can handle address calculations.
				34	def HWPort0 : ProcResource<1>;
				35	def HWPort1 : ProcResource<1>;
				36	def HWPort2 : ProcResource<1>;
				37	def HWPort3 : ProcResource<1>;
				38	def HWPort4 : ProcResource<1>;
				39	def HWPort5 : ProcResource<1>;
				40	def HWPort6 : ProcResource<1>;
				41	def HWPort7 : ProcResource<1>;
				42
				43	// Many micro-ops are capable of issuing on multiple ports.
				44	def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>;
				45	def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
				46	def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
				47	def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
				48	def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
				49	def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
				50	def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
				51
Andrew Trick	40c4f38	2013-06-15 04:50:06 +0000	[diff] [blame]	52	// 60 Entry Unified Scheduler
				53	def HWPortAny : ProcResGroup<[HWPort0, HWPort1, HWPort2, HWPort3, HWPort4,
				54	HWPort5, HWPort6, HWPort7]> {
				55	let BufferSize=60;
				56	}
				57
Andrew Trick	e1d88cf	2013-04-02 01:58:47 +0000	[diff] [blame]	58	// Integer division issued on port 0.
				59	def HWDivider : ProcResource<1>;
Nadav Rotem	e7b6a8a	2013-03-28 22:34:46 +0000	[diff] [blame]	60
				61	// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
				62	// cycles after the memory operand.
				63	def : ReadAdvance<ReadAfterLd, 4>;
				64
				65	// Many SchedWrites are defined in pairs with and without a folded load.
				66	// Instructions with folded loads are usually micro-fused, so they only appear
				67	// as two micro-ops when queued in the reservation station.
				68	// This multiclass defines the resource usage for variants with and without
				69	// folded loads.
				70	multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
				71	ProcResourceKind ExePort,
				72	int Lat> {
				73	// Register variant is using a single cycle on ExePort.
				74	def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
				75
				76	// Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
				77	// latency.
				78	def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> {
				79	let Latency = !add(Lat, 4);
				80	}
				81	}
				82
				83	// A folded store needs a cycle on port 4 for the store data, but it does not
				84	// need an extra port 2/3 cycle to recompute the address.
				85	def : WriteRes<WriteRMW, [HWPort4]>;
				86
				87	def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
				88	def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; }
				89	def : WriteRes<WriteMove, [HWPort0156]>;
				90	def : WriteRes<WriteZero, []>;
				91
				92	defm : HWWriteResPair<WriteALU, HWPort0156, 1>;
				93	defm : HWWriteResPair<WriteIMul, HWPort1, 3>;
Andrew Trick	7201f4f	2013-06-21 18:33:04 +0000	[diff] [blame^]	94	def : WriteRes<WriteIMulH, []> { let Latency = 3; }
Nadav Rotem	e7b6a8a	2013-03-28 22:34:46 +0000	[diff] [blame]	95	defm : HWWriteResPair<WriteShift, HWPort056, 1>;
				96	defm : HWWriteResPair<WriteJump, HWPort5, 1>;
				97
				98	// This is for simple LEAs with one or two input operands.
				99	// The complex ones can only execute on port 1, and they require two cycles on
				100	// the port to read all inputs. We don't model that.
				101	def : WriteRes<WriteLEA, [HWPort15]>;
				102
				103	// This is quite rough, latency depends on the dividend.
				104	def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> {
				105	let Latency = 25;
				106	let ResourceCycles = [1, 10];
				107	}
				108	def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> {
				109	let Latency = 29;
				110	let ResourceCycles = [1, 1, 10];
				111	}
				112
				113	// Scalar and vector floating point.
				114	defm : HWWriteResPair<WriteFAdd, HWPort1, 3>;
				115	defm : HWWriteResPair<WriteFMul, HWPort0, 5>;
				116	defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles.
				117	defm : HWWriteResPair<WriteFRcp, HWPort0, 5>;
				118	defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
				119	defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
				120	defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
				121	defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
				122
				123	// Vector integer operations.
				124	defm : HWWriteResPair<WriteVecShift, HWPort05, 1>;
				125	defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
				126	defm : HWWriteResPair<WriteVecALU, HWPort15, 1>;
				127	defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>;
				128	defm : HWWriteResPair<WriteShuffle, HWPort15, 1>;
				129
				130	def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
				131	def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
				132	} // SchedModel