Blame - llvm/lib/Target/X86/X86TargetTransformInfo.cpp - toolchain/llvm-project

blob: 6ab08cbd12997aafac7f323a1aa2d8b7da29e4b3 [file] [log] [blame]

Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	1	//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file implements a TargetTransformInfo analysis pass specific to the
				11	/// X86 target machine. It uses the target's detailed information to provide
				12	/// more precise answers to certain TTI queries, while letting the target
				13	/// independent and default TTI implementations handle the rest.
				14	///
				15	//===----------------------------------------------------------------------===//
				16
				17	#define DEBUG_TYPE "x86tti"
				18	#include "X86.h"
				19	#include "X86TargetMachine.h"
Chandler Carruth	d3e7355	2013-01-07 03:08:10 +0000	[diff] [blame]	20	#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	21	#include "llvm/Support/Debug.h"
				22	#include "llvm/Target/TargetLowering.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	23	using namespace llvm;
				24
				25	// Declare the pass initialization routine locally as target-specific passes
				26	// don't havve a target-wide initialization entry point, and so we rely on the
				27	// pass constructor initialization.
				28	namespace llvm {
				29	void initializeX86TTIPass(PassRegistry &);
				30	}
				31
				32	namespace {
				33
				34	class X86TTI : public ImmutablePass, public TargetTransformInfo {
				35	const X86TargetMachine *TM;
				36	const X86Subtarget *ST;
				37	const X86TargetLowering *TLI;
				38
				39	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
				40	/// are set if the result needs to be inserted and/or extracted from vectors.
				41	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
				42
				43	public:
				44	X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
				45	llvm_unreachable("This pass cannot be directly constructed");
				46	}
				47
				48	X86TTI(const X86TargetMachine *TM)
				49	: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
				50	TLI(TM->getTargetLowering()) {
				51	initializeX86TTIPass(*PassRegistry::getPassRegistry());
				52	}
				53
				54	virtual void initializePass() {
				55	pushTTIStack(this);
				56	}
				57
				58	virtual void finalizePass() {
				59	popTTIStack();
				60	}
				61
				62	virtual void getAnalysisUsage(AnalysisUsage &AU) const {
				63	TargetTransformInfo::getAnalysisUsage(AU);
				64	}
				65
				66	/// Pass identification.
				67	static char ID;
				68
				69	/// Provide necessary pointer adjustments for the two base classes.
				70	virtual void getAdjustedAnalysisPointer(const void ID) {
				71	if (ID == &TargetTransformInfo::ID)
				72	return (TargetTransformInfo*)this;
				73	return this;
				74	}
				75
				76	/// \name Scalar TTI Implementations
				77	/// @{
Chandler Carruth	50a36cd	2013-01-07 03:16:03 +0000	[diff] [blame]	78	virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	79
				80	/// @}
				81
				82	/// \name Vector TTI Implementations
				83	/// @{
				84
				85	virtual unsigned getNumberOfRegisters(bool Vector) const;
Nadav Rotem	b696c36	2013-01-09 01:15:42 +0000	[diff] [blame^]	86	virtual unsigned getMaximumUnrollFactor() const;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	87	virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
				88	virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
				89	int Index, Type *SubTp) const;
				90	virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
				91	Type *Src) const;
				92	virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				93	Type *CondTy) const;
				94	virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
				95	unsigned Index) const;
				96	virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
				97	unsigned Alignment,
				98	unsigned AddressSpace) const;
				99
				100	/// @}
				101	};
				102
				103	} // end anonymous namespace
				104
				105	INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
				106	"X86 Target Transform Info", true, true, false)
				107	char X86TTI::ID = 0;
				108
				109	ImmutablePass *
				110	llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
				111	return new X86TTI(TM);
				112	}
				113
				114
				115	//===----------------------------------------------------------------------===//
				116	//
				117	// X86 cost model.
				118	//
				119	//===----------------------------------------------------------------------===//
				120
				121	namespace {
				122	struct X86CostTblEntry {
				123	int ISD;
				124	MVT Type;
				125	unsigned Cost;
				126	};
				127	}
				128
				129	static int
				130	FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
				131	for (unsigned int i = 0; i < len; ++i)
				132	if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
				133	return i;
				134
				135	// Could not find an entry.
				136	return -1;
				137	}
				138
				139	namespace {
				140	struct X86TypeConversionCostTblEntry {
				141	int ISD;
				142	MVT Dst;
				143	MVT Src;
				144	unsigned Cost;
				145	};
				146	}
				147
				148	static int
				149	FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
				150	int ISD, MVT Dst, MVT Src) {
				151	for (unsigned int i = 0; i < len; ++i)
				152	if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
				153	return i;
				154
				155	// Could not find an entry.
				156	return -1;
				157	}
				158
Chandler Carruth	50a36cd	2013-01-07 03:16:03 +0000	[diff] [blame]	159	X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	160	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
				161	// TODO: Currently the __builtin_popcount() implementation using SSE3
				162	// instructions is inefficient. Once the problem is fixed, we should
				163	// call ST->hasSSE3() instead of ST->hasSSE4().
Chandler Carruth	50a36cd	2013-01-07 03:16:03 +0000	[diff] [blame]	164	return ST->hasSSE41() ? PSK_FastHardware : PSK_Software;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	165	}
				166
				167	unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
				168	if (ST->is64Bit())
				169	return 16;
				170	return 8;
				171	}
				172
Nadav Rotem	b696c36	2013-01-09 01:15:42 +0000	[diff] [blame^]	173	unsigned X86TTI::getMaximumUnrollFactor() const {
				174	if (ST->isAtom())
				175	return 1;
				176
				177	// Sandybridge and Haswell have multiple execution ports and pipelined
				178	// vector units.
				179	if (ST->hasAVX())
				180	return 4;
				181
				182	return 2;
				183	}
				184
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	185	unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
				186	// Legalize the type.
				187	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
				188
				189	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				190	assert(ISD && "Invalid opcode");
				191
				192	static const X86CostTblEntry AVX1CostTable[] = {
				193	// We don't have to scalarize unsupported ops. We can issue two half-sized
				194	// operations and we only need to extract the upper YMM half.
				195	// Two ops + 1 extract + 1 insert = 4.
				196	{ ISD::MUL, MVT::v8i32, 4 },
				197	{ ISD::SUB, MVT::v8i32, 4 },
				198	{ ISD::ADD, MVT::v8i32, 4 },
				199	{ ISD::MUL, MVT::v4i64, 4 },
				200	{ ISD::SUB, MVT::v4i64, 4 },
				201	{ ISD::ADD, MVT::v4i64, 4 },
				202	};
				203
				204	// Look for AVX1 lowering tricks.
				205	if (ST->hasAVX()) {
				206	int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
				207	LT.second);
				208	if (Idx != -1)
				209	return LT.first * AVX1CostTable[Idx].Cost;
				210	}
				211	// Fallback to the default implementation.
				212	return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty);
				213	}
				214
				215	unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
				216	Type *SubTp) const {
				217	// We only estimate the cost of reverse shuffles.
Chandler Carruth	2109f47	2013-01-07 03:20:02 +0000	[diff] [blame]	218	if (Kind != SK_Reverse)
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	219	return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
				220
				221	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
				222	unsigned Cost = 1;
				223	if (LT.second.getSizeInBits() > 128)
				224	Cost = 3; // Extract + insert + copy.
				225
				226	// Multiple by the number of parts.
				227	return Cost * LT.first;
				228	}
				229
				230	unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type Dst, Type Src) const {
				231	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				232	assert(ISD && "Invalid opcode");
				233
				234	EVT SrcTy = TLI->getValueType(Src);
				235	EVT DstTy = TLI->getValueType(Dst);
				236
				237	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
				238	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				239
				240	static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
				241	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				242	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				243	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				244	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				245	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
				246	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
				247	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
				248	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
				249	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
				250	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
				251	{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
				252	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
				253	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
				254	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
				255	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
				256	};
				257
				258	if (ST->hasAVX()) {
				259	int Idx = FindInConvertTable(AVXConversionTbl,
				260	array_lengthof(AVXConversionTbl),
				261	ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
				262	if (Idx != -1)
				263	return AVXConversionTbl[Idx].Cost;
				264	}
				265
				266	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				267	}
				268
				269	unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				270	Type *CondTy) const {
				271	// Legalize the type.
				272	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
				273
				274	MVT MTy = LT.second;
				275
				276	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				277	assert(ISD && "Invalid opcode");
				278
				279	static const X86CostTblEntry SSE42CostTbl[] = {
				280	{ ISD::SETCC, MVT::v2f64, 1 },
				281	{ ISD::SETCC, MVT::v4f32, 1 },
				282	{ ISD::SETCC, MVT::v2i64, 1 },
				283	{ ISD::SETCC, MVT::v4i32, 1 },
				284	{ ISD::SETCC, MVT::v8i16, 1 },
				285	{ ISD::SETCC, MVT::v16i8, 1 },
				286	};
				287
				288	static const X86CostTblEntry AVX1CostTbl[] = {
				289	{ ISD::SETCC, MVT::v4f64, 1 },
				290	{ ISD::SETCC, MVT::v8f32, 1 },
				291	// AVX1 does not support 8-wide integer compare.
				292	{ ISD::SETCC, MVT::v4i64, 4 },
				293	{ ISD::SETCC, MVT::v8i32, 4 },
				294	{ ISD::SETCC, MVT::v16i16, 4 },
				295	{ ISD::SETCC, MVT::v32i8, 4 },
				296	};
				297
				298	static const X86CostTblEntry AVX2CostTbl[] = {
				299	{ ISD::SETCC, MVT::v4i64, 1 },
				300	{ ISD::SETCC, MVT::v8i32, 1 },
				301	{ ISD::SETCC, MVT::v16i16, 1 },
				302	{ ISD::SETCC, MVT::v32i8, 1 },
				303	};
				304
				305	if (ST->hasAVX2()) {
				306	int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
				307	if (Idx != -1)
				308	return LT.first * AVX2CostTbl[Idx].Cost;
				309	}
				310
				311	if (ST->hasAVX()) {
				312	int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
				313	if (Idx != -1)
				314	return LT.first * AVX1CostTbl[Idx].Cost;
				315	}
				316
				317	if (ST->hasSSE42()) {
				318	int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
				319	if (Idx != -1)
				320	return LT.first * SSE42CostTbl[Idx].Cost;
				321	}
				322
				323	return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
				324	}
				325
				326	unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
				327	unsigned Index) const {
				328	assert(Val->isVectorTy() && "This must be a vector type");
				329
				330	if (Index != -1U) {
				331	// Legalize the type.
				332	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
				333
				334	// This type is legalized to a scalar type.
				335	if (!LT.second.isVector())
				336	return 0;
				337
				338	// The type may be split. Normalize the index to the new type.
				339	unsigned Width = LT.second.getVectorNumElements();
				340	Index = Index % Width;
				341
				342	// Floating point scalars are already located in index #0.
				343	if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
				344	return 0;
				345	}
				346
				347	return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
				348	}
				349
				350	unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				351	unsigned AddressSpace) const {
				352	// Legalize the type.
				353	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
				354	assert((Opcode == Instruction::Load \|\| Opcode == Instruction::Store) &&
				355	"Invalid Opcode");
				356
				357	// Each load/store unit costs 1.
				358	unsigned Cost = LT.first * 1;
				359
				360	// On Sandybridge 256bit load/stores are double pumped
				361	// (but not on Haswell).
				362	if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
				363	Cost*=2;
				364
				365	return Cost;
				366	}