Blame - llvm/lib/Target/X86/X86TargetTransformInfo.cpp - toolchain/llvm-project

blob: 8a699afa6a40686ec7915eaf0a84c2735be64cf3 [file] [log] [blame]

Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	1	//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file implements a TargetTransformInfo analysis pass specific to the
				11	/// X86 target machine. It uses the target's detailed information to provide
				12	/// more precise answers to certain TTI queries, while letting the target
				13	/// independent and default TTI implementations handle the rest.
				14	///
				15	//===----------------------------------------------------------------------===//
				16
				17	#define DEBUG_TYPE "x86tti"
				18	#include "X86.h"
				19	#include "X86TargetMachine.h"
Chandler Carruth	d3e7355	2013-01-07 03:08:10 +0000	[diff] [blame]	20	#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	21	#include "llvm/Support/Debug.h"
				22	#include "llvm/Target/TargetLowering.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	23	using namespace llvm;
				24
				25	// Declare the pass initialization routine locally as target-specific passes
				26	// don't havve a target-wide initialization entry point, and so we rely on the
				27	// pass constructor initialization.
				28	namespace llvm {
				29	void initializeX86TTIPass(PassRegistry &);
				30	}
				31
				32	namespace {
				33
				34	class X86TTI : public ImmutablePass, public TargetTransformInfo {
				35	const X86TargetMachine *TM;
				36	const X86Subtarget *ST;
				37	const X86TargetLowering *TLI;
				38
				39	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
				40	/// are set if the result needs to be inserted and/or extracted from vectors.
				41	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
				42
				43	public:
				44	X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
				45	llvm_unreachable("This pass cannot be directly constructed");
				46	}
				47
				48	X86TTI(const X86TargetMachine *TM)
				49	: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
				50	TLI(TM->getTargetLowering()) {
				51	initializeX86TTIPass(*PassRegistry::getPassRegistry());
				52	}
				53
				54	virtual void initializePass() {
				55	pushTTIStack(this);
				56	}
				57
				58	virtual void finalizePass() {
				59	popTTIStack();
				60	}
				61
				62	virtual void getAnalysisUsage(AnalysisUsage &AU) const {
				63	TargetTransformInfo::getAnalysisUsage(AU);
				64	}
				65
				66	/// Pass identification.
				67	static char ID;
				68
				69	/// Provide necessary pointer adjustments for the two base classes.
				70	virtual void getAdjustedAnalysisPointer(const void ID) {
				71	if (ID == &TargetTransformInfo::ID)
				72	return (TargetTransformInfo*)this;
				73	return this;
				74	}
				75
				76	/// \name Scalar TTI Implementations
				77	/// @{
				78
Chandler Carruth	50a36cd	2013-01-07 03:16:03 +0000	[diff] [blame^]	79	virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	80
				81	/// @}
				82
				83	/// \name Vector TTI Implementations
				84	/// @{
				85
				86	virtual unsigned getNumberOfRegisters(bool Vector) const;
				87	virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
				88	virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
				89	int Index, Type *SubTp) const;
				90	virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
				91	Type *Src) const;
				92	virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				93	Type *CondTy) const;
				94	virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
				95	unsigned Index) const;
				96	virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
				97	unsigned Alignment,
				98	unsigned AddressSpace) const;
				99
				100	/// @}
				101	};
				102
				103	} // end anonymous namespace
				104
				105	INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
				106	"X86 Target Transform Info", true, true, false)
				107	char X86TTI::ID = 0;
				108
				109	ImmutablePass *
				110	llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
				111	return new X86TTI(TM);
				112	}
				113
				114
				115	//===----------------------------------------------------------------------===//
				116	//
				117	// X86 cost model.
				118	//
				119	//===----------------------------------------------------------------------===//
				120
				121	namespace {
				122	struct X86CostTblEntry {
				123	int ISD;
				124	MVT Type;
				125	unsigned Cost;
				126	};
				127	}
				128
				129	static int
				130	FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
				131	for (unsigned int i = 0; i < len; ++i)
				132	if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
				133	return i;
				134
				135	// Could not find an entry.
				136	return -1;
				137	}
				138
				139	namespace {
				140	struct X86TypeConversionCostTblEntry {
				141	int ISD;
				142	MVT Dst;
				143	MVT Src;
				144	unsigned Cost;
				145	};
				146	}
				147
				148	static int
				149	FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
				150	int ISD, MVT Dst, MVT Src) {
				151	for (unsigned int i = 0; i < len; ++i)
				152	if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
				153	return i;
				154
				155	// Could not find an entry.
				156	return -1;
				157	}
				158
				159
Chandler Carruth	50a36cd	2013-01-07 03:16:03 +0000	[diff] [blame^]	160	X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	161	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
				162	// TODO: Currently the __builtin_popcount() implementation using SSE3
				163	// instructions is inefficient. Once the problem is fixed, we should
				164	// call ST->hasSSE3() instead of ST->hasSSE4().
Chandler Carruth	50a36cd	2013-01-07 03:16:03 +0000	[diff] [blame^]	165	return ST->hasSSE41() ? PSK_FastHardware : PSK_Software;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	166	}
				167
				168	unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
				169	if (ST->is64Bit())
				170	return 16;
				171	return 8;
				172	}
				173
				174	unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
				175	// Legalize the type.
				176	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
				177
				178	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				179	assert(ISD && "Invalid opcode");
				180
				181	static const X86CostTblEntry AVX1CostTable[] = {
				182	// We don't have to scalarize unsupported ops. We can issue two half-sized
				183	// operations and we only need to extract the upper YMM half.
				184	// Two ops + 1 extract + 1 insert = 4.
				185	{ ISD::MUL, MVT::v8i32, 4 },
				186	{ ISD::SUB, MVT::v8i32, 4 },
				187	{ ISD::ADD, MVT::v8i32, 4 },
				188	{ ISD::MUL, MVT::v4i64, 4 },
				189	{ ISD::SUB, MVT::v4i64, 4 },
				190	{ ISD::ADD, MVT::v4i64, 4 },
				191	};
				192
				193	// Look for AVX1 lowering tricks.
				194	if (ST->hasAVX()) {
				195	int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
				196	LT.second);
				197	if (Idx != -1)
				198	return LT.first * AVX1CostTable[Idx].Cost;
				199	}
				200	// Fallback to the default implementation.
				201	return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty);
				202	}
				203
				204	unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
				205	Type *SubTp) const {
				206	// We only estimate the cost of reverse shuffles.
				207	if (Kind != Reverse)
				208	return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
				209
				210	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
				211	unsigned Cost = 1;
				212	if (LT.second.getSizeInBits() > 128)
				213	Cost = 3; // Extract + insert + copy.
				214
				215	// Multiple by the number of parts.
				216	return Cost * LT.first;
				217	}
				218
				219	unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type Dst, Type Src) const {
				220	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				221	assert(ISD && "Invalid opcode");
				222
				223	EVT SrcTy = TLI->getValueType(Src);
				224	EVT DstTy = TLI->getValueType(Dst);
				225
				226	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
				227	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				228
				229	static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
				230	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				231	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				232	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				233	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				234	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
				235	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
				236	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
				237	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
				238	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
				239	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
				240	{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
				241	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
				242	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
				243	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
				244	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
				245	};
				246
				247	if (ST->hasAVX()) {
				248	int Idx = FindInConvertTable(AVXConversionTbl,
				249	array_lengthof(AVXConversionTbl),
				250	ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
				251	if (Idx != -1)
				252	return AVXConversionTbl[Idx].Cost;
				253	}
				254
				255	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				256	}
				257
				258	unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				259	Type *CondTy) const {
				260	// Legalize the type.
				261	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
				262
				263	MVT MTy = LT.second;
				264
				265	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				266	assert(ISD && "Invalid opcode");
				267
				268	static const X86CostTblEntry SSE42CostTbl[] = {
				269	{ ISD::SETCC, MVT::v2f64, 1 },
				270	{ ISD::SETCC, MVT::v4f32, 1 },
				271	{ ISD::SETCC, MVT::v2i64, 1 },
				272	{ ISD::SETCC, MVT::v4i32, 1 },
				273	{ ISD::SETCC, MVT::v8i16, 1 },
				274	{ ISD::SETCC, MVT::v16i8, 1 },
				275	};
				276
				277	static const X86CostTblEntry AVX1CostTbl[] = {
				278	{ ISD::SETCC, MVT::v4f64, 1 },
				279	{ ISD::SETCC, MVT::v8f32, 1 },
				280	// AVX1 does not support 8-wide integer compare.
				281	{ ISD::SETCC, MVT::v4i64, 4 },
				282	{ ISD::SETCC, MVT::v8i32, 4 },
				283	{ ISD::SETCC, MVT::v16i16, 4 },
				284	{ ISD::SETCC, MVT::v32i8, 4 },
				285	};
				286
				287	static const X86CostTblEntry AVX2CostTbl[] = {
				288	{ ISD::SETCC, MVT::v4i64, 1 },
				289	{ ISD::SETCC, MVT::v8i32, 1 },
				290	{ ISD::SETCC, MVT::v16i16, 1 },
				291	{ ISD::SETCC, MVT::v32i8, 1 },
				292	};
				293
				294	if (ST->hasAVX2()) {
				295	int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
				296	if (Idx != -1)
				297	return LT.first * AVX2CostTbl[Idx].Cost;
				298	}
				299
				300	if (ST->hasAVX()) {
				301	int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
				302	if (Idx != -1)
				303	return LT.first * AVX1CostTbl[Idx].Cost;
				304	}
				305
				306	if (ST->hasSSE42()) {
				307	int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
				308	if (Idx != -1)
				309	return LT.first * SSE42CostTbl[Idx].Cost;
				310	}
				311
				312	return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
				313	}
				314
				315	unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
				316	unsigned Index) const {
				317	assert(Val->isVectorTy() && "This must be a vector type");
				318
				319	if (Index != -1U) {
				320	// Legalize the type.
				321	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
				322
				323	// This type is legalized to a scalar type.
				324	if (!LT.second.isVector())
				325	return 0;
				326
				327	// The type may be split. Normalize the index to the new type.
				328	unsigned Width = LT.second.getVectorNumElements();
				329	Index = Index % Width;
				330
				331	// Floating point scalars are already located in index #0.
				332	if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
				333	return 0;
				334	}
				335
				336	return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
				337	}
				338
				339	unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				340	unsigned AddressSpace) const {
				341	// Legalize the type.
				342	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
				343	assert((Opcode == Instruction::Load \|\| Opcode == Instruction::Store) &&
				344	"Invalid Opcode");
				345
				346	// Each load/store unit costs 1.
				347	unsigned Cost = LT.first * 1;
				348
				349	// On Sandybridge 256bit load/stores are double pumped
				350	// (but not on Haswell).
				351	if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
				352	Cost*=2;
				353
				354	return Cost;
				355	}