Blame - llvm/lib/Target/X86/X86TargetTransformInfo.cpp - toolchain/llvm-project

blob: bed78ac8ab9675cb9339eaab6b076ed5e5b39041 [file] [log] [blame]

Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	1	//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file implements a TargetTransformInfo analysis pass specific to the
				11	/// X86 target machine. It uses the target's detailed information to provide
				12	/// more precise answers to certain TTI queries, while letting the target
				13	/// independent and default TTI implementations handle the rest.
				14	///
				15	//===----------------------------------------------------------------------===//
				16
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	17	#include "X86.h"
				18	#include "X86TargetMachine.h"
Chandler Carruth	d3e7355	2013-01-07 03:08:10 +0000	[diff] [blame]	19	#include "llvm/Analysis/TargetTransformInfo.h"
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	20	#include "llvm/IR/IntrinsicInst.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	21	#include "llvm/Support/Debug.h"
Renato Golin	d4c392e	2013-01-24 23:01:00 +0000	[diff] [blame]	22	#include "llvm/Target/CostTable.h"
Chandler Carruth	8a8cd2b	2014-01-07 11:48:04 +0000	[diff] [blame]	23	#include "llvm/Target/TargetLowering.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	24	using namespace llvm;
				25
Chandler Carruth	84e68b2	2014-04-22 02:41:26 +0000	[diff] [blame]	26	#define DEBUG_TYPE "x86tti"
				27
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	28	// Declare the pass initialization routine locally as target-specific passes
Eric Christopher	89f1880	2014-05-22 01:21:44 +0000	[diff] [blame]	29	// don't have a target-wide initialization entry point, and so we rely on the
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	30	// pass constructor initialization.
				31	namespace llvm {
				32	void initializeX86TTIPass(PassRegistry &);
				33	}
				34
				35	namespace {
				36
Craig Topper	77dfe45	2014-03-02 08:08:51 +0000	[diff] [blame]	37	class X86TTI final : public ImmutablePass, public TargetTransformInfo {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	38	const X86Subtarget *ST;
				39	const X86TargetLowering *TLI;
				40
				41	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
				42	/// are set if the result needs to be inserted and/or extracted from vectors.
				43	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
				44
				45	public:
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	46	X86TTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	47	llvm_unreachable("This pass cannot be directly constructed");
				48	}
				49
				50	X86TTI(const X86TargetMachine *TM)
Eric Christopher	d913448	2014-08-04 21:25:23 +0000	[diff] [blame]	51	: ImmutablePass(ID), ST(TM->getSubtargetImpl()),
				52	TLI(TM->getSubtargetImpl()->getTargetLowering()) {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	53	initializeX86TTIPass(*PassRegistry::getPassRegistry());
				54	}
				55
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	56	void initializePass() override {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	57	pushTTIStack(this);
				58	}
				59
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	60	void getAnalysisUsage(AnalysisUsage &AU) const override {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	61	TargetTransformInfo::getAnalysisUsage(AU);
				62	}
				63
				64	/// Pass identification.
				65	static char ID;
				66
				67	/// Provide necessary pointer adjustments for the two base classes.
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	68	void getAdjustedAnalysisPointer(const void ID) override {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	69	if (ID == &TargetTransformInfo::ID)
				70	return (TargetTransformInfo*)this;
				71	return this;
				72	}
				73
				74	/// \name Scalar TTI Implementations
				75	/// @{
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	76	PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	77
				78	/// @}
				79
				80	/// \name Vector TTI Implementations
				81	/// @{
				82
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	83	unsigned getNumberOfRegisters(bool Vector) const override;
				84	unsigned getRegisterBitWidth(bool Vector) const override;
Sanjay Patel	b653de1	2014-09-10 17:58:16 +0000	[diff] [blame]	85	unsigned getMaxInterleaveFactor() const override;
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	86	unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
Karthik Bhat	7f33ff7	2014-08-25 04:56:54 +0000	[diff] [blame]	87	OperandValueKind, OperandValueProperties,
				88	OperandValueProperties) const override;
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	89	unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
				90	int Index, Type *SubTp) const override;
				91	unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
				92	Type *Src) const override;
				93	unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				94	Type *CondTy) const override;
				95	unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
				96	unsigned Index) const override;
				97	unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				98	unsigned AddressSpace) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	99
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	100	unsigned getAddressComputationCost(Type *PtrTy,
				101	bool IsComplex) const override;
Arnold Schwaighofer	6042a26	2013-07-12 19:16:07 +0000	[diff] [blame]	102
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	103	unsigned getReductionCost(unsigned Opcode, Type *Ty,
				104	bool IsPairwiseForm) const override;
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	105
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	106	unsigned getIntImmCost(int64_t) const;
				107
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	108	unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	109
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	110	unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	111	Type *Ty) const override;
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	112	unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	113	Type *Ty) const override;
Elena Demikhovsky	f1de34b	2014-12-04 09:40:44 +0000	[diff] [blame^]	114	bool isLegalPredicatedLoad (Type *DataType, int Consecutive) const override;
				115	bool isLegalPredicatedStore(Type *DataType, int Consecutive) const override;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	116
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	117	/// @}
				118	};
				119
				120	} // end anonymous namespace
				121
				122	INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
				123	"X86 Target Transform Info", true, true, false)
				124	char X86TTI::ID = 0;
				125
				126	ImmutablePass *
				127	llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
				128	return new X86TTI(TM);
				129	}
				130
				131
				132	//===----------------------------------------------------------------------===//
				133	//
				134	// X86 cost model.
				135	//
				136	//===----------------------------------------------------------------------===//
				137
Chandler Carruth	50a36cd	2013-01-07 03:16:03 +0000	[diff] [blame]	138	X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	139	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
				140	// TODO: Currently the __builtin_popcount() implementation using SSE3
				141	// instructions is inefficient. Once the problem is fixed, we should
Craig Topper	0a63e1d	2013-09-08 00:47:31 +0000	[diff] [blame]	142	// call ST->hasSSE3() instead of ST->hasPOPCNT().
				143	return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	144	}
				145
				146	unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
Nadav Rotem	b1791a7	2013-01-09 22:29:00 +0000	[diff] [blame]	147	if (Vector && !ST->hasSSE1())
				148	return 0;
				149
Adam Nemet	2820a5b	2014-07-09 18:22:33 +0000	[diff] [blame]	150	if (ST->is64Bit()) {
				151	if (Vector && ST->hasAVX512())
				152	return 32;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	153	return 16;
Adam Nemet	2820a5b	2014-07-09 18:22:33 +0000	[diff] [blame]	154	}
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	155	return 8;
				156	}
				157
Nadav Rotem	b1791a7	2013-01-09 22:29:00 +0000	[diff] [blame]	158	unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
				159	if (Vector) {
Adam Nemet	2820a5b	2014-07-09 18:22:33 +0000	[diff] [blame]	160	if (ST->hasAVX512()) return 512;
Nadav Rotem	b1791a7	2013-01-09 22:29:00 +0000	[diff] [blame]	161	if (ST->hasAVX()) return 256;
				162	if (ST->hasSSE1()) return 128;
				163	return 0;
				164	}
				165
				166	if (ST->is64Bit())
				167	return 64;
				168	return 32;
				169
				170	}
				171
Sanjay Patel	b653de1	2014-09-10 17:58:16 +0000	[diff] [blame]	172	unsigned X86TTI::getMaxInterleaveFactor() const {
Nadav Rotem	b696c36	2013-01-09 01:15:42 +0000	[diff] [blame]	173	if (ST->isAtom())
				174	return 1;
				175
				176	// Sandybridge and Haswell have multiple execution ports and pipelined
				177	// vector units.
				178	if (ST->hasAVX())
				179	return 4;
				180
				181	return 2;
				182	}
				183
Karthik Bhat	7f33ff7	2014-08-25 04:56:54 +0000	[diff] [blame]	184	unsigned X86TTI::getArithmeticInstrCost(
				185	unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
				186	OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
				187	OperandValueProperties Opd2PropInfo) const {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	188	// Legalize the type.
				189	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
				190
				191	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				192	assert(ISD && "Invalid opcode");
				193
Karthik Bhat	7f33ff7	2014-08-25 04:56:54 +0000	[diff] [blame]	194	if (ISD == ISD::SDIV &&
				195	Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
				196	Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
				197	// On X86, vector signed division by constants power-of-two are
				198	// normally expanded to the sequence SRA + SRL + ADD + SRA.
				199	// The OperandValue properties many not be same as that of previous
				200	// operation;conservatively assume OP_None.
				201	unsigned Cost =
				202	2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
				203	TargetTransformInfo::OP_None,
				204	TargetTransformInfo::OP_None);
				205	Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
				206	TargetTransformInfo::OP_None,
				207	TargetTransformInfo::OP_None);
				208	Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info,
				209	TargetTransformInfo::OP_None,
				210	TargetTransformInfo::OP_None);
				211
				212	return Cost;
				213	}
				214
Benjamin Kramer	7c37227	2014-04-26 14:53:05 +0000	[diff] [blame]	215	static const CostTblEntry<MVT::SimpleValueType>
				216	AVX2UniformConstCostTable[] = {
				217	{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
				218	{ ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
				219	{ ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
				220	{ ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
				221	};
				222
				223	if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
				224	ST->hasAVX2()) {
				225	int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second);
				226	if (Idx != -1)
				227	return LT.first * AVX2UniformConstCostTable[Idx].Cost;
				228	}
				229
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	230	static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
				231	{ ISD::SHL, MVT::v16i32, 1 },
				232	{ ISD::SRL, MVT::v16i32, 1 },
				233	{ ISD::SRA, MVT::v16i32, 1 },
				234	{ ISD::SHL, MVT::v8i64, 1 },
				235	{ ISD::SRL, MVT::v8i64, 1 },
				236	{ ISD::SRA, MVT::v8i64, 1 },
				237	};
				238
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	239	static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	240	// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
				241	// customize them to detect the cases where shift amount is a scalar one.
				242	{ ISD::SHL, MVT::v4i32, 1 },
				243	{ ISD::SRL, MVT::v4i32, 1 },
				244	{ ISD::SRA, MVT::v4i32, 1 },
				245	{ ISD::SHL, MVT::v8i32, 1 },
				246	{ ISD::SRL, MVT::v8i32, 1 },
				247	{ ISD::SRA, MVT::v8i32, 1 },
				248	{ ISD::SHL, MVT::v2i64, 1 },
				249	{ ISD::SRL, MVT::v2i64, 1 },
				250	{ ISD::SHL, MVT::v4i64, 1 },
				251	{ ISD::SRL, MVT::v4i64, 1 },
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	252
				253	{ ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence.
				254	{ ISD::SHL, MVT::v16i16, 16*10 }, // Scalarized.
				255
				256	{ ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized.
				257	{ ISD::SRL, MVT::v16i16, 8*10 }, // Scalarized.
				258
				259	{ ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized.
				260	{ ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized.
				261	{ ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
Arnold Schwaighofer	a04b9ef	2013-06-25 19:14:09 +0000	[diff] [blame]	262
				263	// Vectorizing division is a bad idea. See the SSE2 table for more comments.
				264	{ ISD::SDIV, MVT::v32i8, 32*20 },
				265	{ ISD::SDIV, MVT::v16i16, 16*20 },
				266	{ ISD::SDIV, MVT::v8i32, 8*20 },
				267	{ ISD::SDIV, MVT::v4i64, 4*20 },
				268	{ ISD::UDIV, MVT::v32i8, 32*20 },
				269	{ ISD::UDIV, MVT::v16i16, 16*20 },
				270	{ ISD::UDIV, MVT::v8i32, 8*20 },
				271	{ ISD::UDIV, MVT::v4i64, 4*20 },
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	272	};
				273
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	274	if (ST->hasAVX512()) {
				275	int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
				276	if (Idx != -1)
				277	return LT.first * AVX512CostTable[Idx].Cost;
				278	}
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	279	// Look for AVX2 lowering tricks.
				280	if (ST->hasAVX2()) {
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	281	if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
				282	(Op2Info == TargetTransformInfo::OK_UniformConstantValue \|\|
				283	Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
				284	// On AVX2, a packed v16i16 shift left by a constant build_vector
				285	// is lowered into a vector multiply (vpmullw).
				286	return LT.first;
				287
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	288	int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	289	if (Idx != -1)
				290	return LT.first * AVX2CostTable[Idx].Cost;
				291	}
				292
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	293	static const CostTblEntry<MVT::SimpleValueType>
				294	SSE2UniformConstCostTable[] = {
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	295	// We don't correctly identify costs of casts because they are marked as
				296	// custom.
				297	// Constant splats are cheaper for the following instructions.
				298	{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
				299	{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
				300	{ ISD::SHL, MVT::v4i32, 1 }, // pslld
				301	{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
				302
				303	{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
				304	{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
				305	{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
				306	{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
				307
				308	{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
				309	{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
				310	{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
Benjamin Kramer	7c37227	2014-04-26 14:53:05 +0000	[diff] [blame]	311
				312	{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
				313	{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
Benjamin Kramer	ce4b3fe	2014-04-27 18:47:54 +0000	[diff] [blame]	314	{ ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
Benjamin Kramer	7c37227	2014-04-26 14:53:05 +0000	[diff] [blame]	315	{ ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	316	};
				317
				318	if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
				319	ST->hasSSE2()) {
Benjamin Kramer	ce4b3fe	2014-04-27 18:47:54 +0000	[diff] [blame]	320	// pmuldq sequence.
				321	if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
				322	return LT.first * 15;
				323
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	324	int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	325	if (Idx != -1)
				326	return LT.first * SSE2UniformConstCostTable[Idx].Cost;
				327	}
				328
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	329	if (ISD == ISD::SHL &&
				330	Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
				331	EVT VT = LT.second;
				332	if ((VT == MVT::v8i16 && ST->hasSSE2()) \|\|
				333	(VT == MVT::v4i32 && ST->hasSSE41()))
				334	// Vector shift left by non uniform constant can be lowered
				335	// into vector multiply (pmullw/pmulld).
				336	return LT.first;
				337	if (VT == MVT::v4i32 && ST->hasSSE2())
				338	// A vector shift left by non uniform constant is converted
				339	// into a vector multiply; the new multiply is eventually
				340	// lowered into a sequence of shuffles and 2 x pmuludq.
				341	ISD = ISD::MUL;
				342	}
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	343
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	344	static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	345	// We don't correctly identify costs of casts because they are marked as
				346	// custom.
				347	// For some cases, where the shift amount is a scalar we would be able
				348	// to generate better code. Unfortunately, when this is the case the value
				349	// (the splat) will get hoisted out of the loop, thereby making it invisible
				350	// to ISel. The cost model must return worst case assumptions because it is
				351	// used for vectorization and we don't want to make vectorized code worse
				352	// than scalar code.
				353	{ ISD::SHL, MVT::v16i8, 30 }, // cmpeqb sequence.
				354	{ ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
				355	{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
				356	{ ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	357	{ ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	358
				359	{ ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
				360	{ ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
				361	{ ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
				362	{ ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
				363
				364	{ ISD::SRA, MVT::v16i8, 16*10 }, // Scalarized.
				365	{ ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized.
				366	{ ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
				367	{ ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
Arnold Schwaighofer	a04b9ef	2013-06-25 19:14:09 +0000	[diff] [blame]	368
				369	// It is not a good idea to vectorize division. We have to scalarize it and
				370	// in the process we will often end up having to spilling regular
				371	// registers. The overhead of division is going to dominate most kernels
				372	// anyways so try hard to prevent vectorization of division - it is
				373	// generally a bad idea. Assume somewhat arbitrarily that we have to be able
				374	// to hide "20 cycles" for each lane.
				375	{ ISD::SDIV, MVT::v16i8, 16*20 },
				376	{ ISD::SDIV, MVT::v8i16, 8*20 },
				377	{ ISD::SDIV, MVT::v4i32, 4*20 },
				378	{ ISD::SDIV, MVT::v2i64, 2*20 },
				379	{ ISD::UDIV, MVT::v16i8, 16*20 },
				380	{ ISD::UDIV, MVT::v8i16, 8*20 },
				381	{ ISD::UDIV, MVT::v4i32, 4*20 },
				382	{ ISD::UDIV, MVT::v2i64, 2*20 },
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	383	};
				384
				385	if (ST->hasSSE2()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	386	int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	387	if (Idx != -1)
				388	return LT.first * SSE2CostTable[Idx].Cost;
				389	}
				390
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	391	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	392	// We don't have to scalarize unsupported ops. We can issue two half-sized
				393	// operations and we only need to extract the upper YMM half.
				394	// Two ops + 1 extract + 1 insert = 4.
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	395	{ ISD::MUL, MVT::v16i16, 4 },
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	396	{ ISD::MUL, MVT::v8i32, 4 },
				397	{ ISD::SUB, MVT::v8i32, 4 },
				398	{ ISD::ADD, MVT::v8i32, 4 },
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	399	{ ISD::SUB, MVT::v4i64, 4 },
				400	{ ISD::ADD, MVT::v4i64, 4 },
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	401	// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
				402	// are lowered as a series of long multiplies(3), shifts(4) and adds(2)
				403	// Because we believe v4i64 to be a legal type, we must also include the
				404	// split factor of two in the cost table. Therefore, the cost here is 18
				405	// instead of 9.
				406	{ ISD::MUL, MVT::v4i64, 18 },
				407	};
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	408
				409	// Look for AVX1 lowering tricks.
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	410	if (ST->hasAVX() && !ST->hasAVX2()) {
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	411	EVT VT = LT.second;
				412
				413	// v16i16 and v8i32 shifts by non-uniform constants are lowered into a
				414	// sequence of extract + two vector multiply + insert.
				415	if (ISD == ISD::SHL && (VT == MVT::v8i32 \|\| VT == MVT::v16i16) &&
				416	Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
				417	ISD = ISD::MUL;
				418
				419	int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	420	if (Idx != -1)
				421	return LT.first * AVX1CostTable[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	422	}
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	423
				424	// Custom lowering of vectors.
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	425	static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	426	// A v2i64/v4i64 and multiply is custom lowered as a series of long
				427	// multiplies(3), shifts(4) and adds(2).
				428	{ ISD::MUL, MVT::v2i64, 9 },
				429	{ ISD::MUL, MVT::v4i64, 9 },
				430	};
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	431	int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	432	if (Idx != -1)
				433	return LT.first * CustomLowered[Idx].Cost;
				434
				435	// Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
				436	// 2x pmuludq, 2x shuffle.
				437	if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
				438	!ST->hasSSE41())
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	439	return LT.first * 6;
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	440
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	441	// Fallback to the default implementation.
Arnold Schwaighofer	b977387	2013-04-04 23:26:21 +0000	[diff] [blame]	442	return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
				443	Op2Info);
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	444	}
				445
				446	unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
				447	Type *SubTp) const {
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	448	// We only estimate the cost of reverse and alternate shuffles.
				449	if (Kind != SK_Reverse && Kind != SK_Alternate)
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	450	return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
				451
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	452	if (Kind == SK_Reverse) {
				453	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
				454	unsigned Cost = 1;
				455	if (LT.second.getSizeInBits() > 128)
				456	Cost = 3; // Extract + insert + copy.
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	457
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	458	// Multiple by the number of parts.
				459	return Cost * LT.first;
				460	}
				461
				462	if (Kind == SK_Alternate) {
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	463	// 64-bit packed float vectors (v2f32) are widened to type v4f32.
				464	// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	465	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
				466
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	467	// The backend knows how to generate a single VEX.256 version of
				468	// instruction VPBLENDW if the target supports AVX2.
				469	if (ST->hasAVX2() && LT.second == MVT::v16i16)
				470	return LT.first;
				471
				472	static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = {
				473	{ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
				474	{ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
				475
				476	{ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
				477	{ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps
				478
				479	// This shuffle is custom lowered into a sequence of:
				480	// 2x vextractf128 , 2x vpblendw , 1x vinsertf128
				481	{ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
				482
				483	// This shuffle is custom lowered into a long sequence of:
				484	// 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
				485	{ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
				486	};
				487
				488	if (ST->hasAVX()) {
				489	int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
				490	if (Idx != -1)
				491	return LT.first * AVXAltShuffleTbl[Idx].Cost;
				492	}
				493
				494	static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = {
				495	// These are lowered into movsd.
				496	{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
				497	{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
				498
				499	// packed float vectors with four elements are lowered into BLENDI dag
				500	// nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
				501	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
				502	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
				503
				504	// This shuffle generates a single pshufw.
				505	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
				506
				507	// There is no instruction that matches a v16i8 alternate shuffle.
				508	// The backend will expand it into the sequence 'pshufb + pshufb + or'.
				509	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
				510	};
				511
				512	if (ST->hasSSE41()) {
				513	int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
				514	if (Idx != -1)
				515	return LT.first * SSE41AltShuffleTbl[Idx].Cost;
				516	}
				517
				518	static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = {
				519	{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
				520	{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
				521
				522	// SSE3 doesn't have 'blendps'. The following shuffles are expanded into
				523	// the sequence 'shufps + pshufd'
				524	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
				525	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
				526
				527	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
				528	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
				529	};
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	530
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	531	if (ST->hasSSSE3()) {
				532	int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
				533	if (Idx != -1)
				534	return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
				535	}
				536
				537	static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = {
				538	{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
				539	{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
				540
				541	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
				542	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	543
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	544	// This is expanded into a long sequence of four extract + four insert.
				545	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
				546
				547	// 8 x (pinsrw + pextrw + and + movb + movzb + or)
				548	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
				549	};
				550
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	551	// Fall-back (SSE3 and SSE2).
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	552	int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
				553	if (Idx != -1)
				554	return LT.first * SSEAltShuffleTbl[Idx].Cost;
				555	return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	556	}
				557
				558	return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	559	}
				560
				561	unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type Dst, Type Src) const {
				562	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				563	assert(ISD && "Invalid opcode");
				564
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	565	std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
				566	std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
				567
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	568	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
				569	SSE2ConvTbl[] = {
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	570	// These are somewhat magic numbers justified by looking at the output of
				571	// Intel's IACA, running some kernels and making sure when we take
				572	// legalization into account the throughput will be overestimated.
				573	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
				574	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
				575	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
				576	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
				577	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
				578	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
				579	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
				580	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
				581	// There are faster sequences for float conversions.
				582	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
Quentin Colombet	360460b	2014-11-11 02:23:47 +0000	[diff] [blame]	583	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	584	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
				585	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
				586	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
				587	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
				588	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
				589	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
				590	};
				591
				592	if (ST->hasSSE2() && !ST->hasAVX()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	593	int Idx =
				594	ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	595	if (Idx != -1)
				596	return LTSrc.first * SSE2ConvTbl[Idx].Cost;
				597	}
				598
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	599	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
				600	AVX512ConversionTbl[] = {
				601	{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
				602	{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
				603	{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
				604	{ ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
				605
				606	{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
				607	{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
				608	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
				609	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
				610	{ ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
				611
				612	// v16i1 -> v16i32 - load + broadcast
				613	{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
				614	{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
				615
				616	{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
				617	{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
				618	{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
				619	{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
				620	{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
				621	{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
				622
Elena Demikhovsky	d5e95b5	2014-11-13 11:46:16 +0000	[diff] [blame]	623	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
				624	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
				625	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
				626	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
				627	{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
				628	{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
				629	{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	630	};
				631
				632	if (ST->hasAVX512()) {
				633	int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
				634	LTSrc.second);
				635	if (Idx != -1)
				636	return AVX512ConversionTbl[Idx].Cost;
				637	}
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	638	EVT SrcTy = TLI->getValueType(Src);
				639	EVT DstTy = TLI->getValueType(Dst);
				640
Arnold Schwaighofer	c0c7ff4	2013-04-17 20:04:53 +0000	[diff] [blame]	641	// The function getSimpleVT only handles simple value types.
				642	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
				643	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				644
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	645	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	646	AVX2ConversionTbl[] = {
				647	{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
				648	{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
				649	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
				650	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
				651	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				652	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				653	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				654	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				655	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
				656	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
				657	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
				658	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
				659	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				660	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				661	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				662	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				663
				664	{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 },
				665	{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 },
				666	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
				667	{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
				668	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
				669	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	670
				671	{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
				672	{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
Quentin Colombet	360460b	2014-11-11 02:23:47 +0000	[diff] [blame]	673
				674	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	675	};
				676
				677	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	678	AVXConversionTbl[] = {
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	679	{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
				680	{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
				681	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
				682	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
				683	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 },
				684	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
				685	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
				686	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
				687	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 },
				688	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },
				689	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
				690	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
				691	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
				692	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				693	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
				694	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
				695
				696	{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 },
				697	{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 },
				698	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 },
				699	{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
				700	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
				701	{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },
				702	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 },
Benjamin Kramer	52ceb44	2013-04-01 10:23:49 +0000	[diff] [blame]	703
				704	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
				705	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
				706	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
				707	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
				708	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
				709	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
				710	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
				711	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
				712	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
				713	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
				714	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
				715	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
				716
				717	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
				718	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
				719	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
				720	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
				721	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
				722	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
				723	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				724	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
				725	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
				726	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
				727	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
				728	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
Quentin Colombet	85b904d	2014-03-27 22:27:41 +0000	[diff] [blame]	729	// The generic code to compute the scalar overhead is currently broken.
				730	// Workaround this limitation by estimating the scalarization overhead
				731	// here. We have roughly 10 instructions per scalar element.
				732	// Multiply that by the vector width.
				733	// FIXME: remove that when PR19268 is fixed.
Quentin Colombet	3914bf5	2014-03-27 00:52:16 +0000	[diff] [blame]	734	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
				735	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 4*10 },
Benjamin Kramer	52ceb44	2013-04-01 10:23:49 +0000	[diff] [blame]	736
Jim Grosbach	72fbde8	2014-03-27 00:04:11 +0000	[diff] [blame]	737	{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	738	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
Adam Nemet	6dafe97	2014-03-30 18:07:13 +0000	[diff] [blame]	739	// This node is expanded into scalarized operations but BasicTTI is overly
				740	// optimistic estimating its cost. It computes 3 per element (one
				741	// vector-extract, one scalar conversion and one vector-insert). The
				742	// problem is that the inserts form a read-modify-write chain so latency
				743	// should be factored in too. Inflating the cost per element by 1.
				744	{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 8*4 },
Adam Nemet	10c4ce2	2014-03-31 21:54:48 +0000	[diff] [blame]	745	{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	746	};
				747
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	748	if (ST->hasAVX2()) {
				749	int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
				750	DstTy.getSimpleVT(), SrcTy.getSimpleVT());
				751	if (Idx != -1)
				752	return AVX2ConversionTbl[Idx].Cost;
				753	}
				754
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	755	if (ST->hasAVX()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	756	int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
				757	SrcTy.getSimpleVT());
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	758	if (Idx != -1)
				759	return AVXConversionTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	760	}
				761
				762	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				763	}
				764
				765	unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				766	Type *CondTy) const {
				767	// Legalize the type.
				768	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
				769
				770	MVT MTy = LT.second;
				771
				772	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				773	assert(ISD && "Invalid opcode");
				774
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	775	static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	776	{ ISD::SETCC, MVT::v2f64, 1 },
				777	{ ISD::SETCC, MVT::v4f32, 1 },
				778	{ ISD::SETCC, MVT::v2i64, 1 },
				779	{ ISD::SETCC, MVT::v4i32, 1 },
				780	{ ISD::SETCC, MVT::v8i16, 1 },
				781	{ ISD::SETCC, MVT::v16i8, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	782	};
				783
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	784	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	785	{ ISD::SETCC, MVT::v4f64, 1 },
				786	{ ISD::SETCC, MVT::v8f32, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	787	// AVX1 does not support 8-wide integer compare.
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	788	{ ISD::SETCC, MVT::v4i64, 4 },
				789	{ ISD::SETCC, MVT::v8i32, 4 },
				790	{ ISD::SETCC, MVT::v16i16, 4 },
				791	{ ISD::SETCC, MVT::v32i8, 4 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	792	};
				793
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	794	static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	795	{ ISD::SETCC, MVT::v4i64, 1 },
				796	{ ISD::SETCC, MVT::v8i32, 1 },
				797	{ ISD::SETCC, MVT::v16i16, 1 },
				798	{ ISD::SETCC, MVT::v32i8, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	799	};
				800
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	801	static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
				802	{ ISD::SETCC, MVT::v8i64, 1 },
				803	{ ISD::SETCC, MVT::v16i32, 1 },
				804	{ ISD::SETCC, MVT::v8f64, 1 },
				805	{ ISD::SETCC, MVT::v16f32, 1 },
				806	};
				807
				808	if (ST->hasAVX512()) {
				809	int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
				810	if (Idx != -1)
				811	return LT.first * AVX512CostTbl[Idx].Cost;
				812	}
				813
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	814	if (ST->hasAVX2()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	815	int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	816	if (Idx != -1)
				817	return LT.first * AVX2CostTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	818	}
				819
				820	if (ST->hasAVX()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	821	int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	822	if (Idx != -1)
				823	return LT.first * AVX1CostTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	824	}
				825
				826	if (ST->hasSSE42()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	827	int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	828	if (Idx != -1)
				829	return LT.first * SSE42CostTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	830	}
				831
				832	return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
				833	}
				834
				835	unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
				836	unsigned Index) const {
				837	assert(Val->isVectorTy() && "This must be a vector type");
				838
				839	if (Index != -1U) {
				840	// Legalize the type.
				841	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
				842
				843	// This type is legalized to a scalar type.
				844	if (!LT.second.isVector())
				845	return 0;
				846
				847	// The type may be split. Normalize the index to the new type.
				848	unsigned Width = LT.second.getVectorNumElements();
				849	Index = Index % Width;
				850
				851	// Floating point scalars are already located in index #0.
				852	if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
				853	return 0;
				854	}
				855
				856	return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
				857	}
				858
Nadav Rotem	f9ecbcb	2013-06-27 17:52:04 +0000	[diff] [blame]	859	unsigned X86TTI::getScalarizationOverhead(Type *Ty, bool Insert,
				860	bool Extract) const {
				861	assert (Ty->isVectorTy() && "Can only scalarize vectors");
				862	unsigned Cost = 0;
				863
				864	for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
				865	if (Insert)
				866	Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
				867	if (Extract)
				868	Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
				869	}
				870
				871	return Cost;
				872	}
				873
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	874	unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				875	unsigned AddressSpace) const {
Alp Toker	f907b89	2013-12-05 05:44:44 +0000	[diff] [blame]	876	// Handle non-power-of-two vectors such as <3 x float>
Nadav Rotem	f9ecbcb	2013-06-27 17:52:04 +0000	[diff] [blame]	877	if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
				878	unsigned NumElem = VTy->getVectorNumElements();
				879
				880	// Handle a few common cases:
				881	// <3 x float>
				882	if (NumElem == 3 && VTy->getScalarSizeInBits() == 32)
				883	// Cost = 64 bit store + extract + 32 bit store.
				884	return 3;
				885
				886	// <3 x double>
				887	if (NumElem == 3 && VTy->getScalarSizeInBits() == 64)
				888	// Cost = 128 bit store + unpack + 64 bit store.
				889	return 3;
				890
Alp Toker	f907b89	2013-12-05 05:44:44 +0000	[diff] [blame]	891	// Assume that all other non-power-of-two numbers are scalarized.
Nadav Rotem	f9ecbcb	2013-06-27 17:52:04 +0000	[diff] [blame]	892	if (!isPowerOf2_32(NumElem)) {
				893	unsigned Cost = TargetTransformInfo::getMemoryOpCost(Opcode,
				894	VTy->getScalarType(),
				895	Alignment,
				896	AddressSpace);
				897	unsigned SplitCost = getScalarizationOverhead(Src,
				898	Opcode == Instruction::Load,
				899	Opcode==Instruction::Store);
				900	return NumElem * Cost + SplitCost;
				901	}
				902	}
				903
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	904	// Legalize the type.
				905	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
				906	assert((Opcode == Instruction::Load \|\| Opcode == Instruction::Store) &&
				907	"Invalid Opcode");
				908
				909	// Each load/store unit costs 1.
				910	unsigned Cost = LT.first * 1;
				911
				912	// On Sandybridge 256bit load/stores are double pumped
				913	// (but not on Haswell).
				914	if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
				915	Cost*=2;
				916
				917	return Cost;
				918	}
Arnold Schwaighofer	6042a26	2013-07-12 19:16:07 +0000	[diff] [blame]	919
				920	unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
				921	// Address computations in vectorized code with non-consecutive addresses will
				922	// likely result in more instructions compared to scalar code where the
				923	// computation can more often be merged into the index mode. The resulting
				924	// extra micro-ops can significantly decrease throughput.
				925	unsigned NumVectorInstToHideOverhead = 10;
				926
				927	if (Ty->isVectorTy() && IsComplex)
				928	return NumVectorInstToHideOverhead;
				929
				930	return TargetTransformInfo::getAddressComputationCost(Ty, IsComplex);
				931	}
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	932
				933	unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
				934	bool IsPairwise) const {
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	935
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	936	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	937
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	938	MVT MTy = LT.second;
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	939
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	940	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				941	assert(ISD && "Invalid opcode");
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	942
				943	// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
				944	// and make it as the cost.
				945
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	946	static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
				947	{ ISD::FADD, MVT::v2f64, 2 },
				948	{ ISD::FADD, MVT::v4f32, 4 },
				949	{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
				950	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
				951	{ ISD::ADD, MVT::v8i16, 5 },
				952	};
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	953
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	954	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
				955	{ ISD::FADD, MVT::v4f32, 4 },
				956	{ ISD::FADD, MVT::v4f64, 5 },
				957	{ ISD::FADD, MVT::v8f32, 7 },
				958	{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
				959	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
				960	{ ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
				961	{ ISD::ADD, MVT::v8i16, 5 },
				962	{ ISD::ADD, MVT::v8i32, 5 },
				963	};
				964
				965	static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
				966	{ ISD::FADD, MVT::v2f64, 2 },
				967	{ ISD::FADD, MVT::v4f32, 4 },
				968	{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
				969	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
				970	{ ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
				971	};
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	972
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	973	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
				974	{ ISD::FADD, MVT::v4f32, 3 },
				975	{ ISD::FADD, MVT::v4f64, 3 },
				976	{ ISD::FADD, MVT::v8f32, 4 },
				977	{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
				978	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8".
				979	{ ISD::ADD, MVT::v4i64, 3 },
				980	{ ISD::ADD, MVT::v8i16, 4 },
				981	{ ISD::ADD, MVT::v8i32, 5 },
				982	};
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	983
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	984	if (IsPairwise) {
				985	if (ST->hasAVX()) {
				986	int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
				987	if (Idx != -1)
				988	return LT.first * AVX1CostTblPairWise[Idx].Cost;
				989	}
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	990
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	991	if (ST->hasSSE42()) {
				992	int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
				993	if (Idx != -1)
				994	return LT.first * SSE42CostTblPairWise[Idx].Cost;
				995	}
				996	} else {
				997	if (ST->hasAVX()) {
				998	int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
				999	if (Idx != -1)
				1000	return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
				1001	}
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	1002
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	1003	if (ST->hasSSE42()) {
				1004	int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
				1005	if (Idx != -1)
				1006	return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
				1007	}
				1008	}
				1009
				1010	return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise);
				1011	}
				1012
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	1013	/// \brief Calculate the cost of materializing a 64-bit value. This helper
				1014	/// method might only calculate a fraction of a larger immediate. Therefore it
				1015	/// is valid to return a cost of ZERO.
				1016	unsigned X86TTI::getIntImmCost(int64_t Val) const {
				1017	if (Val == 0)
				1018	return TCC_Free;
				1019
				1020	if (isInt<32>(Val))
				1021	return TCC_Basic;
				1022
				1023	return 2 * TCC_Basic;
				1024	}
				1025
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1026	unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
				1027	assert(Ty->isIntegerTy());
				1028
				1029	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				1030	if (BitSize == 0)
				1031	return ~0U;
				1032
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1033	// Never hoist constants larger than 128bit, because this might lead to
				1034	// incorrect code generation or assertions in codegen.
				1035	// Fixme: Create a cost model for types larger than i128 once the codegen
				1036	// issues have been fixed.
				1037	if (BitSize > 128)
				1038	return TCC_Free;
				1039
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1040	if (Imm == 0)
				1041	return TCC_Free;
				1042
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	1043	// Sign-extend all constants to a multiple of 64-bit.
				1044	APInt ImmVal = Imm;
				1045	if (BitSize & 0x3f)
				1046	ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
				1047
				1048	// Split the constant into 64-bit chunks and calculate the cost for each
				1049	// chunk.
				1050	unsigned Cost = 0;
				1051	for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
				1052	APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
				1053	int64_t Val = Tmp.getSExtValue();
				1054	Cost += getIntImmCost(Val);
				1055	}
				1056	// We need at least one instruction to materialze the constant.
				1057	return std::max(1U, Cost);
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1058	}
				1059
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1060	unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1061	Type *Ty) const {
				1062	assert(Ty->isIntegerTy());
				1063
				1064	unsigned BitSize = Ty->getPrimitiveSizeInBits();
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1065	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				1066	// here, so that constant hoisting will ignore this constant.
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1067	if (BitSize == 0)
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1068	return TCC_Free;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1069
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1070	unsigned ImmIdx = ~0U;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1071	switch (Opcode) {
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1072	default: return TCC_Free;
				1073	case Instruction::GetElementPtr:
Juergen Ributzka	27435b3	2014-04-02 21:45:36 +0000	[diff] [blame]	1074	// Always hoist the base address of a GetElementPtr. This prevents the
				1075	// creation of new constants for every base constant that gets constant
				1076	// folded with the offset.
Juergen Ributzka	631c491	2014-03-25 18:01:25 +0000	[diff] [blame]	1077	if (Idx == 0)
				1078	return 2 * TCC_Basic;
				1079	return TCC_Free;
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1080	case Instruction::Store:
				1081	ImmIdx = 0;
				1082	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1083	case Instruction::Add:
				1084	case Instruction::Sub:
				1085	case Instruction::Mul:
				1086	case Instruction::UDiv:
				1087	case Instruction::SDiv:
				1088	case Instruction::URem:
				1089	case Instruction::SRem:
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1090	case Instruction::And:
				1091	case Instruction::Or:
				1092	case Instruction::Xor:
				1093	case Instruction::ICmp:
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1094	ImmIdx = 1;
				1095	break;
Michael Zolotukhin	1f4a960	2014-04-30 19:17:32 +0000	[diff] [blame]	1096	// Always return TCC_Free for the shift value of a shift instruction.
				1097	case Instruction::Shl:
				1098	case Instruction::LShr:
				1099	case Instruction::AShr:
				1100	if (Idx == 1)
				1101	return TCC_Free;
				1102	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1103	case Instruction::Trunc:
				1104	case Instruction::ZExt:
				1105	case Instruction::SExt:
				1106	case Instruction::IntToPtr:
				1107	case Instruction::PtrToInt:
				1108	case Instruction::BitCast:
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1109	case Instruction::PHI:
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1110	case Instruction::Call:
				1111	case Instruction::Select:
				1112	case Instruction::Ret:
				1113	case Instruction::Load:
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1114	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1115	}
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1116
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	1117	if (Idx == ImmIdx) {
				1118	unsigned NumConstants = (BitSize + 63) / 64;
				1119	unsigned Cost = X86TTI::getIntImmCost(Imm, Ty);
Saleem Abdulrasool	3c890c4	2014-06-12 17:56:18 +0000	[diff] [blame]	1120	return (Cost <= NumConstants * TCC_Basic)
				1121	? static_cast<unsigned>(TCC_Free)
				1122	: Cost;
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	1123	}
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1124
				1125	return X86TTI::getIntImmCost(Imm, Ty);
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1126	}
				1127
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1128	unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
				1129	const APInt &Imm, Type *Ty) const {
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1130	assert(Ty->isIntegerTy());
				1131
				1132	unsigned BitSize = Ty->getPrimitiveSizeInBits();
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1133	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				1134	// here, so that constant hoisting will ignore this constant.
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1135	if (BitSize == 0)
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1136	return TCC_Free;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1137
				1138	switch (IID) {
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1139	default: return TCC_Free;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1140	case Intrinsic::sadd_with_overflow:
				1141	case Intrinsic::uadd_with_overflow:
				1142	case Intrinsic::ssub_with_overflow:
				1143	case Intrinsic::usub_with_overflow:
				1144	case Intrinsic::smul_with_overflow:
				1145	case Intrinsic::umul_with_overflow:
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1146	if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1147	return TCC_Free;
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1148	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1149	case Intrinsic::experimental_stackmap:
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1150	if ((Idx < 2) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1151	return TCC_Free;
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1152	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1153	case Intrinsic::experimental_patchpoint_void:
				1154	case Intrinsic::experimental_patchpoint_i64:
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1155	if ((Idx < 4) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1156	return TCC_Free;
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1157	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1158	}
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1159	return X86TTI::getIntImmCost(Imm, Ty);
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1160	}
Elena Demikhovsky	f1de34b	2014-12-04 09:40:44 +0000	[diff] [blame^]	1161
				1162	bool X86TTI::isLegalPredicatedLoad(Type *DataType, int Consecutive) const {
				1163	int ScalarWidth = DataType->getScalarSizeInBits();
				1164
				1165	// Todo: AVX512 allows gather/scatter, works with strided and random as well
				1166	if ((ScalarWidth < 32) \|\| (Consecutive == 0))
				1167	return false;
				1168	if (ST->hasAVX512() \|\| ST->hasAVX2())
				1169	return true;
				1170	return false;
				1171	}
				1172
				1173	bool X86TTI::isLegalPredicatedStore(Type *DataType, int Consecutive) const {
				1174	return isLegalPredicatedLoad(DataType, Consecutive);
				1175	}
				1176