Blame - llvm/lib/Target/X86/X86TargetTransformInfo.cpp - toolchain/llvm-project

blob: 9d7f1238fff0a9ff56e81aa451f273cf0033fc35 [file] [log] [blame]

Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	1	//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file implements a TargetTransformInfo analysis pass specific to the
				11	/// X86 target machine. It uses the target's detailed information to provide
				12	/// more precise answers to certain TTI queries, while letting the target
				13	/// independent and default TTI implementations handle the rest.
				14	///
				15	//===----------------------------------------------------------------------===//
				16
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	17	#include "X86.h"
				18	#include "X86TargetMachine.h"
Chandler Carruth	d3e7355	2013-01-07 03:08:10 +0000	[diff] [blame]	19	#include "llvm/Analysis/TargetTransformInfo.h"
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	20	#include "llvm/IR/IntrinsicInst.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	21	#include "llvm/Support/Debug.h"
Renato Golin	d4c392e	2013-01-24 23:01:00 +0000	[diff] [blame]	22	#include "llvm/Target/CostTable.h"
Chandler Carruth	8a8cd2b	2014-01-07 11:48:04 +0000	[diff] [blame]	23	#include "llvm/Target/TargetLowering.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	24	using namespace llvm;
				25
Chandler Carruth	84e68b2	2014-04-22 02:41:26 +0000	[diff] [blame]	26	#define DEBUG_TYPE "x86tti"
				27
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	28	// Declare the pass initialization routine locally as target-specific passes
Eric Christopher	89f1880	2014-05-22 01:21:44 +0000	[diff] [blame]	29	// don't have a target-wide initialization entry point, and so we rely on the
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	30	// pass constructor initialization.
				31	namespace llvm {
				32	void initializeX86TTIPass(PassRegistry &);
				33	}
				34
				35	namespace {
				36
Craig Topper	77dfe45	2014-03-02 08:08:51 +0000	[diff] [blame]	37	class X86TTI final : public ImmutablePass, public TargetTransformInfo {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	38	const X86Subtarget *ST;
				39	const X86TargetLowering *TLI;
				40
				41	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
				42	/// are set if the result needs to be inserted and/or extracted from vectors.
				43	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
				44
				45	public:
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	46	X86TTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	47	llvm_unreachable("This pass cannot be directly constructed");
				48	}
				49
				50	X86TTI(const X86TargetMachine *TM)
Eric Christopher	d913448	2014-08-04 21:25:23 +0000	[diff] [blame]	51	: ImmutablePass(ID), ST(TM->getSubtargetImpl()),
				52	TLI(TM->getSubtargetImpl()->getTargetLowering()) {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	53	initializeX86TTIPass(*PassRegistry::getPassRegistry());
				54	}
				55
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	56	void initializePass() override {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	57	pushTTIStack(this);
				58	}
				59
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	60	void getAnalysisUsage(AnalysisUsage &AU) const override {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	61	TargetTransformInfo::getAnalysisUsage(AU);
				62	}
				63
				64	/// Pass identification.
				65	static char ID;
				66
				67	/// Provide necessary pointer adjustments for the two base classes.
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	68	void getAdjustedAnalysisPointer(const void ID) override {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	69	if (ID == &TargetTransformInfo::ID)
				70	return (TargetTransformInfo*)this;
				71	return this;
				72	}
				73
				74	/// \name Scalar TTI Implementations
				75	/// @{
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	76	PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	77
				78	/// @}
				79
				80	/// \name Vector TTI Implementations
				81	/// @{
				82
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	83	unsigned getNumberOfRegisters(bool Vector) const override;
				84	unsigned getRegisterBitWidth(bool Vector) const override;
Sanjay Patel	b653de1	2014-09-10 17:58:16 +0000	[diff] [blame]	85	unsigned getMaxInterleaveFactor() const override;
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	86	unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
Karthik Bhat	7f33ff7	2014-08-25 04:56:54 +0000	[diff] [blame]	87	OperandValueKind, OperandValueProperties,
				88	OperandValueProperties) const override;
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	89	unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
				90	int Index, Type *SubTp) const override;
				91	unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
				92	Type *Src) const override;
				93	unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				94	Type *CondTy) const override;
				95	unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
				96	unsigned Index) const override;
				97	unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				98	unsigned AddressSpace) const override;
Elena Demikhovsky	a3232f7	2015-01-25 08:44:46 +0000	[diff] [blame]	99	unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
				100	unsigned Alignment,
				101	unsigned AddressSpace) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	102
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	103	unsigned getAddressComputationCost(Type *PtrTy,
				104	bool IsComplex) const override;
Arnold Schwaighofer	6042a26	2013-07-12 19:16:07 +0000	[diff] [blame]	105
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	106	unsigned getReductionCost(unsigned Opcode, Type *Ty,
				107	bool IsPairwiseForm) const override;
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	108
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	109	unsigned getIntImmCost(int64_t) const;
				110
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	111	unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	112
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	113	unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	114	Type *Ty) const override;
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	115	unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Craig Topper	24e685f	2014-03-10 05:29:18 +0000	[diff] [blame]	116	Type *Ty) const override;
Elena Demikhovsky	3fcafa2	2014-12-14 09:43:50 +0000	[diff] [blame]	117	bool isLegalMaskedLoad (Type *DataType, int Consecutive) const override;
				118	bool isLegalMaskedStore(Type *DataType, int Consecutive) const override;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	119
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	120	/// @}
				121	};
				122
				123	} // end anonymous namespace
				124
				125	INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
				126	"X86 Target Transform Info", true, true, false)
				127	char X86TTI::ID = 0;
				128
				129	ImmutablePass *
				130	llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
				131	return new X86TTI(TM);
				132	}
				133
				134
				135	//===----------------------------------------------------------------------===//
				136	//
				137	// X86 cost model.
				138	//
				139	//===----------------------------------------------------------------------===//
				140
Chandler Carruth	50a36cd	2013-01-07 03:16:03 +0000	[diff] [blame]	141	X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	142	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
				143	// TODO: Currently the __builtin_popcount() implementation using SSE3
				144	// instructions is inefficient. Once the problem is fixed, we should
Craig Topper	0a63e1d	2013-09-08 00:47:31 +0000	[diff] [blame]	145	// call ST->hasSSE3() instead of ST->hasPOPCNT().
				146	return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	147	}
				148
				149	unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
Nadav Rotem	b1791a7	2013-01-09 22:29:00 +0000	[diff] [blame]	150	if (Vector && !ST->hasSSE1())
				151	return 0;
				152
Adam Nemet	2820a5b	2014-07-09 18:22:33 +0000	[diff] [blame]	153	if (ST->is64Bit()) {
				154	if (Vector && ST->hasAVX512())
				155	return 32;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	156	return 16;
Adam Nemet	2820a5b	2014-07-09 18:22:33 +0000	[diff] [blame]	157	}
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	158	return 8;
				159	}
				160
Nadav Rotem	b1791a7	2013-01-09 22:29:00 +0000	[diff] [blame]	161	unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
				162	if (Vector) {
Adam Nemet	2820a5b	2014-07-09 18:22:33 +0000	[diff] [blame]	163	if (ST->hasAVX512()) return 512;
Nadav Rotem	b1791a7	2013-01-09 22:29:00 +0000	[diff] [blame]	164	if (ST->hasAVX()) return 256;
				165	if (ST->hasSSE1()) return 128;
				166	return 0;
				167	}
				168
				169	if (ST->is64Bit())
				170	return 64;
				171	return 32;
				172
				173	}
				174
Sanjay Patel	b653de1	2014-09-10 17:58:16 +0000	[diff] [blame]	175	unsigned X86TTI::getMaxInterleaveFactor() const {
Nadav Rotem	b696c36	2013-01-09 01:15:42 +0000	[diff] [blame]	176	if (ST->isAtom())
				177	return 1;
				178
				179	// Sandybridge and Haswell have multiple execution ports and pipelined
				180	// vector units.
				181	if (ST->hasAVX())
				182	return 4;
				183
				184	return 2;
				185	}
				186
Karthik Bhat	7f33ff7	2014-08-25 04:56:54 +0000	[diff] [blame]	187	unsigned X86TTI::getArithmeticInstrCost(
				188	unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
				189	OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
				190	OperandValueProperties Opd2PropInfo) const {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	191	// Legalize the type.
				192	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
				193
				194	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				195	assert(ISD && "Invalid opcode");
				196
Karthik Bhat	7f33ff7	2014-08-25 04:56:54 +0000	[diff] [blame]	197	if (ISD == ISD::SDIV &&
				198	Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
				199	Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
				200	// On X86, vector signed division by constants power-of-two are
				201	// normally expanded to the sequence SRA + SRL + ADD + SRA.
				202	// The OperandValue properties many not be same as that of previous
				203	// operation;conservatively assume OP_None.
				204	unsigned Cost =
				205	2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
				206	TargetTransformInfo::OP_None,
				207	TargetTransformInfo::OP_None);
				208	Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
				209	TargetTransformInfo::OP_None,
				210	TargetTransformInfo::OP_None);
				211	Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info,
				212	TargetTransformInfo::OP_None,
				213	TargetTransformInfo::OP_None);
				214
				215	return Cost;
				216	}
				217
Benjamin Kramer	7c37227	2014-04-26 14:53:05 +0000	[diff] [blame]	218	static const CostTblEntry<MVT::SimpleValueType>
				219	AVX2UniformConstCostTable[] = {
				220	{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
				221	{ ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
				222	{ ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
				223	{ ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
				224	};
				225
				226	if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
				227	ST->hasAVX2()) {
				228	int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second);
				229	if (Idx != -1)
				230	return LT.first * AVX2UniformConstCostTable[Idx].Cost;
				231	}
				232
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	233	static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
				234	{ ISD::SHL, MVT::v16i32, 1 },
				235	{ ISD::SRL, MVT::v16i32, 1 },
				236	{ ISD::SRA, MVT::v16i32, 1 },
				237	{ ISD::SHL, MVT::v8i64, 1 },
				238	{ ISD::SRL, MVT::v8i64, 1 },
				239	{ ISD::SRA, MVT::v8i64, 1 },
				240	};
				241
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	242	static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	243	// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
				244	// customize them to detect the cases where shift amount is a scalar one.
				245	{ ISD::SHL, MVT::v4i32, 1 },
				246	{ ISD::SRL, MVT::v4i32, 1 },
				247	{ ISD::SRA, MVT::v4i32, 1 },
				248	{ ISD::SHL, MVT::v8i32, 1 },
				249	{ ISD::SRL, MVT::v8i32, 1 },
				250	{ ISD::SRA, MVT::v8i32, 1 },
				251	{ ISD::SHL, MVT::v2i64, 1 },
				252	{ ISD::SRL, MVT::v2i64, 1 },
				253	{ ISD::SHL, MVT::v4i64, 1 },
				254	{ ISD::SRL, MVT::v4i64, 1 },
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	255
				256	{ ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence.
				257	{ ISD::SHL, MVT::v16i16, 16*10 }, // Scalarized.
				258
				259	{ ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized.
				260	{ ISD::SRL, MVT::v16i16, 8*10 }, // Scalarized.
				261
				262	{ ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized.
				263	{ ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized.
				264	{ ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
Arnold Schwaighofer	a04b9ef	2013-06-25 19:14:09 +0000	[diff] [blame]	265
				266	// Vectorizing division is a bad idea. See the SSE2 table for more comments.
				267	{ ISD::SDIV, MVT::v32i8, 32*20 },
				268	{ ISD::SDIV, MVT::v16i16, 16*20 },
				269	{ ISD::SDIV, MVT::v8i32, 8*20 },
				270	{ ISD::SDIV, MVT::v4i64, 4*20 },
				271	{ ISD::UDIV, MVT::v32i8, 32*20 },
				272	{ ISD::UDIV, MVT::v16i16, 16*20 },
				273	{ ISD::UDIV, MVT::v8i32, 8*20 },
				274	{ ISD::UDIV, MVT::v4i64, 4*20 },
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	275	};
				276
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	277	if (ST->hasAVX512()) {
				278	int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
				279	if (Idx != -1)
				280	return LT.first * AVX512CostTable[Idx].Cost;
				281	}
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	282	// Look for AVX2 lowering tricks.
				283	if (ST->hasAVX2()) {
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	284	if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
				285	(Op2Info == TargetTransformInfo::OK_UniformConstantValue \|\|
				286	Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
				287	// On AVX2, a packed v16i16 shift left by a constant build_vector
				288	// is lowered into a vector multiply (vpmullw).
				289	return LT.first;
				290
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	291	int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	292	if (Idx != -1)
				293	return LT.first * AVX2CostTable[Idx].Cost;
				294	}
				295
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	296	static const CostTblEntry<MVT::SimpleValueType>
				297	SSE2UniformConstCostTable[] = {
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	298	// We don't correctly identify costs of casts because they are marked as
				299	// custom.
				300	// Constant splats are cheaper for the following instructions.
				301	{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
				302	{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
				303	{ ISD::SHL, MVT::v4i32, 1 }, // pslld
				304	{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
				305
				306	{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
				307	{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
				308	{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
				309	{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
				310
				311	{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
				312	{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
				313	{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
Benjamin Kramer	7c37227	2014-04-26 14:53:05 +0000	[diff] [blame]	314
				315	{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
				316	{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
Benjamin Kramer	ce4b3fe	2014-04-27 18:47:54 +0000	[diff] [blame]	317	{ ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
Benjamin Kramer	7c37227	2014-04-26 14:53:05 +0000	[diff] [blame]	318	{ ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	319	};
				320
				321	if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
				322	ST->hasSSE2()) {
Benjamin Kramer	ce4b3fe	2014-04-27 18:47:54 +0000	[diff] [blame]	323	// pmuldq sequence.
				324	if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
				325	return LT.first * 15;
				326
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	327	int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	328	if (Idx != -1)
				329	return LT.first * SSE2UniformConstCostTable[Idx].Cost;
				330	}
				331
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	332	if (ISD == ISD::SHL &&
				333	Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
				334	EVT VT = LT.second;
				335	if ((VT == MVT::v8i16 && ST->hasSSE2()) \|\|
				336	(VT == MVT::v4i32 && ST->hasSSE41()))
				337	// Vector shift left by non uniform constant can be lowered
				338	// into vector multiply (pmullw/pmulld).
				339	return LT.first;
				340	if (VT == MVT::v4i32 && ST->hasSSE2())
				341	// A vector shift left by non uniform constant is converted
				342	// into a vector multiply; the new multiply is eventually
				343	// lowered into a sequence of shuffles and 2 x pmuludq.
				344	ISD = ISD::MUL;
				345	}
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	346
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	347	static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	348	// We don't correctly identify costs of casts because they are marked as
				349	// custom.
				350	// For some cases, where the shift amount is a scalar we would be able
				351	// to generate better code. Unfortunately, when this is the case the value
				352	// (the splat) will get hoisted out of the loop, thereby making it invisible
				353	// to ISel. The cost model must return worst case assumptions because it is
				354	// used for vectorization and we don't want to make vectorized code worse
				355	// than scalar code.
				356	{ ISD::SHL, MVT::v16i8, 30 }, // cmpeqb sequence.
				357	{ ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
				358	{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
				359	{ ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	360	{ ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	361
				362	{ ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
				363	{ ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
				364	{ ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
				365	{ ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
				366
				367	{ ISD::SRA, MVT::v16i8, 16*10 }, // Scalarized.
				368	{ ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized.
				369	{ ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
				370	{ ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
Arnold Schwaighofer	a04b9ef	2013-06-25 19:14:09 +0000	[diff] [blame]	371
				372	// It is not a good idea to vectorize division. We have to scalarize it and
				373	// in the process we will often end up having to spilling regular
				374	// registers. The overhead of division is going to dominate most kernels
				375	// anyways so try hard to prevent vectorization of division - it is
				376	// generally a bad idea. Assume somewhat arbitrarily that we have to be able
				377	// to hide "20 cycles" for each lane.
				378	{ ISD::SDIV, MVT::v16i8, 16*20 },
				379	{ ISD::SDIV, MVT::v8i16, 8*20 },
				380	{ ISD::SDIV, MVT::v4i32, 4*20 },
				381	{ ISD::SDIV, MVT::v2i64, 2*20 },
				382	{ ISD::UDIV, MVT::v16i8, 16*20 },
				383	{ ISD::UDIV, MVT::v8i16, 8*20 },
				384	{ ISD::UDIV, MVT::v4i32, 4*20 },
				385	{ ISD::UDIV, MVT::v2i64, 2*20 },
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	386	};
				387
				388	if (ST->hasSSE2()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	389	int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	390	if (Idx != -1)
				391	return LT.first * SSE2CostTable[Idx].Cost;
				392	}
				393
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	394	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	395	// We don't have to scalarize unsupported ops. We can issue two half-sized
				396	// operations and we only need to extract the upper YMM half.
				397	// Two ops + 1 extract + 1 insert = 4.
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	398	{ ISD::MUL, MVT::v16i16, 4 },
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	399	{ ISD::MUL, MVT::v8i32, 4 },
				400	{ ISD::SUB, MVT::v8i32, 4 },
				401	{ ISD::ADD, MVT::v8i32, 4 },
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	402	{ ISD::SUB, MVT::v4i64, 4 },
				403	{ ISD::ADD, MVT::v4i64, 4 },
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	404	// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
				405	// are lowered as a series of long multiplies(3), shifts(4) and adds(2)
				406	// Because we believe v4i64 to be a legal type, we must also include the
				407	// split factor of two in the cost table. Therefore, the cost here is 18
				408	// instead of 9.
				409	{ ISD::MUL, MVT::v4i64, 18 },
				410	};
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	411
				412	// Look for AVX1 lowering tricks.
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	413	if (ST->hasAVX() && !ST->hasAVX2()) {
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	414	EVT VT = LT.second;
				415
				416	// v16i16 and v8i32 shifts by non-uniform constants are lowered into a
				417	// sequence of extract + two vector multiply + insert.
				418	if (ISD == ISD::SHL && (VT == MVT::v8i32 \|\| VT == MVT::v16i16) &&
				419	Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
				420	ISD = ISD::MUL;
				421
				422	int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	423	if (Idx != -1)
				424	return LT.first * AVX1CostTable[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	425	}
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	426
				427	// Custom lowering of vectors.
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	428	static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	429	// A v2i64/v4i64 and multiply is custom lowered as a series of long
				430	// multiplies(3), shifts(4) and adds(2).
				431	{ ISD::MUL, MVT::v2i64, 9 },
				432	{ ISD::MUL, MVT::v4i64, 9 },
				433	};
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	434	int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	435	if (Idx != -1)
				436	return LT.first * CustomLowered[Idx].Cost;
				437
				438	// Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
				439	// 2x pmuludq, 2x shuffle.
				440	if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
				441	!ST->hasSSE41())
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	442	return LT.first * 6;
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	443
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	444	// Fallback to the default implementation.
Arnold Schwaighofer	b977387	2013-04-04 23:26:21 +0000	[diff] [blame]	445	return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
				446	Op2Info);
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	447	}
				448
				449	unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
				450	Type *SubTp) const {
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	451	// We only estimate the cost of reverse and alternate shuffles.
				452	if (Kind != SK_Reverse && Kind != SK_Alternate)
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	453	return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
				454
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	455	if (Kind == SK_Reverse) {
				456	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
				457	unsigned Cost = 1;
				458	if (LT.second.getSizeInBits() > 128)
				459	Cost = 3; // Extract + insert + copy.
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	460
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	461	// Multiple by the number of parts.
				462	return Cost * LT.first;
				463	}
				464
				465	if (Kind == SK_Alternate) {
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	466	// 64-bit packed float vectors (v2f32) are widened to type v4f32.
				467	// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	468	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
				469
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	470	// The backend knows how to generate a single VEX.256 version of
				471	// instruction VPBLENDW if the target supports AVX2.
				472	if (ST->hasAVX2() && LT.second == MVT::v16i16)
				473	return LT.first;
				474
				475	static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = {
				476	{ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
				477	{ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
				478
				479	{ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
				480	{ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps
				481
				482	// This shuffle is custom lowered into a sequence of:
				483	// 2x vextractf128 , 2x vpblendw , 1x vinsertf128
				484	{ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
				485
				486	// This shuffle is custom lowered into a long sequence of:
				487	// 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
				488	{ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
				489	};
				490
				491	if (ST->hasAVX()) {
				492	int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
				493	if (Idx != -1)
				494	return LT.first * AVXAltShuffleTbl[Idx].Cost;
				495	}
				496
				497	static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = {
				498	// These are lowered into movsd.
				499	{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
				500	{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
				501
				502	// packed float vectors with four elements are lowered into BLENDI dag
				503	// nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
				504	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
				505	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
				506
				507	// This shuffle generates a single pshufw.
				508	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
				509
				510	// There is no instruction that matches a v16i8 alternate shuffle.
				511	// The backend will expand it into the sequence 'pshufb + pshufb + or'.
				512	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
				513	};
				514
				515	if (ST->hasSSE41()) {
				516	int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
				517	if (Idx != -1)
				518	return LT.first * SSE41AltShuffleTbl[Idx].Cost;
				519	}
				520
				521	static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = {
				522	{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
				523	{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
				524
				525	// SSE3 doesn't have 'blendps'. The following shuffles are expanded into
				526	// the sequence 'shufps + pshufd'
				527	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
				528	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
				529
				530	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
				531	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
				532	};
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	533
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	534	if (ST->hasSSSE3()) {
				535	int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
				536	if (Idx != -1)
				537	return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
				538	}
				539
				540	static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = {
				541	{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
				542	{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
				543
				544	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
				545	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	546
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	547	// This is expanded into a long sequence of four extract + four insert.
				548	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
				549
				550	// 8 x (pinsrw + pextrw + and + movb + movzb + or)
				551	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
				552	};
				553
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	554	// Fall-back (SSE3 and SSE2).
Andrea Di Biagio	c8e8bda	2014-07-03 22:24:18 +0000	[diff] [blame]	555	int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
				556	if (Idx != -1)
				557	return LT.first * SSEAltShuffleTbl[Idx].Cost;
				558	return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
Karthik Bhat	e03a25d	2014-06-20 04:32:48 +0000	[diff] [blame]	559	}
				560
				561	return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	562	}
				563
				564	unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type Dst, Type Src) const {
				565	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				566	assert(ISD && "Invalid opcode");
				567
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	568	std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
				569	std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
				570
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	571	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
				572	SSE2ConvTbl[] = {
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	573	// These are somewhat magic numbers justified by looking at the output of
				574	// Intel's IACA, running some kernels and making sure when we take
				575	// legalization into account the throughput will be overestimated.
				576	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
				577	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
				578	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
				579	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
				580	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
				581	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
				582	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
				583	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
				584	// There are faster sequences for float conversions.
				585	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
Quentin Colombet	360460b	2014-11-11 02:23:47 +0000	[diff] [blame]	586	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	587	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
				588	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
				589	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
				590	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
				591	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
				592	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
				593	};
				594
				595	if (ST->hasSSE2() && !ST->hasAVX()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	596	int Idx =
				597	ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	598	if (Idx != -1)
				599	return LTSrc.first * SSE2ConvTbl[Idx].Cost;
				600	}
				601
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	602	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
				603	AVX512ConversionTbl[] = {
				604	{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
				605	{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
				606	{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
				607	{ ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
				608
				609	{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
				610	{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
				611	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
				612	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
				613	{ ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
				614
				615	// v16i1 -> v16i32 - load + broadcast
				616	{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
				617	{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
				618
				619	{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
				620	{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
				621	{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
				622	{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
				623	{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
				624	{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
				625
Elena Demikhovsky	d5e95b5	2014-11-13 11:46:16 +0000	[diff] [blame]	626	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
				627	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
				628	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
				629	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
				630	{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
				631	{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
				632	{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	633	};
				634
				635	if (ST->hasAVX512()) {
				636	int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
				637	LTSrc.second);
				638	if (Idx != -1)
				639	return AVX512ConversionTbl[Idx].Cost;
				640	}
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	641	EVT SrcTy = TLI->getValueType(Src);
				642	EVT DstTy = TLI->getValueType(Dst);
				643
Arnold Schwaighofer	c0c7ff4	2013-04-17 20:04:53 +0000	[diff] [blame]	644	// The function getSimpleVT only handles simple value types.
				645	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
				646	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				647
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	648	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	649	AVX2ConversionTbl[] = {
				650	{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
				651	{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
				652	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
				653	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
				654	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				655	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				656	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				657	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				658	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
				659	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
				660	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
				661	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
				662	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				663	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				664	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				665	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				666
				667	{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 },
				668	{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 },
				669	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
				670	{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
				671	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
				672	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	673
				674	{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
				675	{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
Quentin Colombet	360460b	2014-11-11 02:23:47 +0000	[diff] [blame]	676
				677	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	678	};
				679
				680	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	681	AVXConversionTbl[] = {
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	682	{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
				683	{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
				684	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
				685	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
				686	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 },
				687	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
				688	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
				689	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
				690	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 },
				691	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },
				692	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
				693	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
				694	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
				695	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				696	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
				697	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
				698
				699	{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 },
				700	{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 },
				701	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 },
				702	{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
				703	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
				704	{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },
				705	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 },
Benjamin Kramer	52ceb44	2013-04-01 10:23:49 +0000	[diff] [blame]	706
				707	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
				708	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
				709	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
				710	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
				711	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
				712	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
				713	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
				714	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
				715	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
				716	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
				717	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
				718	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
				719
				720	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
				721	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
				722	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
				723	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
				724	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
				725	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
				726	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				727	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
				728	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
				729	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
				730	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
				731	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
Quentin Colombet	85b904d	2014-03-27 22:27:41 +0000	[diff] [blame]	732	// The generic code to compute the scalar overhead is currently broken.
				733	// Workaround this limitation by estimating the scalarization overhead
				734	// here. We have roughly 10 instructions per scalar element.
				735	// Multiply that by the vector width.
				736	// FIXME: remove that when PR19268 is fixed.
Quentin Colombet	3914bf5	2014-03-27 00:52:16 +0000	[diff] [blame]	737	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
				738	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 4*10 },
Benjamin Kramer	52ceb44	2013-04-01 10:23:49 +0000	[diff] [blame]	739
Jim Grosbach	72fbde8	2014-03-27 00:04:11 +0000	[diff] [blame]	740	{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	741	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
Adam Nemet	6dafe97	2014-03-30 18:07:13 +0000	[diff] [blame]	742	// This node is expanded into scalarized operations but BasicTTI is overly
				743	// optimistic estimating its cost. It computes 3 per element (one
				744	// vector-extract, one scalar conversion and one vector-insert). The
				745	// problem is that the inserts form a read-modify-write chain so latency
				746	// should be factored in too. Inflating the cost per element by 1.
				747	{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 8*4 },
Adam Nemet	10c4ce2	2014-03-31 21:54:48 +0000	[diff] [blame]	748	{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	749	};
				750
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	751	if (ST->hasAVX2()) {
				752	int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
				753	DstTy.getSimpleVT(), SrcTy.getSimpleVT());
				754	if (Idx != -1)
				755	return AVX2ConversionTbl[Idx].Cost;
				756	}
				757
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	758	if (ST->hasAVX()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	759	int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
				760	SrcTy.getSimpleVT());
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	761	if (Idx != -1)
				762	return AVXConversionTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	763	}
				764
				765	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				766	}
				767
				768	unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				769	Type *CondTy) const {
				770	// Legalize the type.
				771	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
				772
				773	MVT MTy = LT.second;
				774
				775	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				776	assert(ISD && "Invalid opcode");
				777
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	778	static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	779	{ ISD::SETCC, MVT::v2f64, 1 },
				780	{ ISD::SETCC, MVT::v4f32, 1 },
				781	{ ISD::SETCC, MVT::v2i64, 1 },
				782	{ ISD::SETCC, MVT::v4i32, 1 },
				783	{ ISD::SETCC, MVT::v8i16, 1 },
				784	{ ISD::SETCC, MVT::v16i8, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	785	};
				786
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	787	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	788	{ ISD::SETCC, MVT::v4f64, 1 },
				789	{ ISD::SETCC, MVT::v8f32, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	790	// AVX1 does not support 8-wide integer compare.
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	791	{ ISD::SETCC, MVT::v4i64, 4 },
				792	{ ISD::SETCC, MVT::v8i32, 4 },
				793	{ ISD::SETCC, MVT::v16i16, 4 },
				794	{ ISD::SETCC, MVT::v32i8, 4 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	795	};
				796
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	797	static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	798	{ ISD::SETCC, MVT::v4i64, 1 },
				799	{ ISD::SETCC, MVT::v8i32, 1 },
				800	{ ISD::SETCC, MVT::v16i16, 1 },
				801	{ ISD::SETCC, MVT::v32i8, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	802	};
				803
Elena Demikhovsky	2701247	2014-09-16 07:57:37 +0000	[diff] [blame]	804	static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
				805	{ ISD::SETCC, MVT::v8i64, 1 },
				806	{ ISD::SETCC, MVT::v16i32, 1 },
				807	{ ISD::SETCC, MVT::v8f64, 1 },
				808	{ ISD::SETCC, MVT::v16f32, 1 },
				809	};
				810
				811	if (ST->hasAVX512()) {
				812	int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
				813	if (Idx != -1)
				814	return LT.first * AVX512CostTbl[Idx].Cost;
				815	}
				816
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	817	if (ST->hasAVX2()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	818	int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	819	if (Idx != -1)
				820	return LT.first * AVX2CostTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	821	}
				822
				823	if (ST->hasAVX()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	824	int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	825	if (Idx != -1)
				826	return LT.first * AVX1CostTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	827	}
				828
				829	if (ST->hasSSE42()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	830	int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	831	if (Idx != -1)
				832	return LT.first * SSE42CostTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	833	}
				834
				835	return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
				836	}
				837
				838	unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
				839	unsigned Index) const {
				840	assert(Val->isVectorTy() && "This must be a vector type");
				841
				842	if (Index != -1U) {
				843	// Legalize the type.
				844	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
				845
				846	// This type is legalized to a scalar type.
				847	if (!LT.second.isVector())
				848	return 0;
				849
				850	// The type may be split. Normalize the index to the new type.
				851	unsigned Width = LT.second.getVectorNumElements();
				852	Index = Index % Width;
				853
				854	// Floating point scalars are already located in index #0.
				855	if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
				856	return 0;
				857	}
				858
				859	return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
				860	}
				861
Nadav Rotem	f9ecbcb	2013-06-27 17:52:04 +0000	[diff] [blame]	862	unsigned X86TTI::getScalarizationOverhead(Type *Ty, bool Insert,
				863	bool Extract) const {
				864	assert (Ty->isVectorTy() && "Can only scalarize vectors");
				865	unsigned Cost = 0;
				866
				867	for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
				868	if (Insert)
				869	Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
				870	if (Extract)
				871	Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
				872	}
				873
				874	return Cost;
				875	}
				876
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	877	unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				878	unsigned AddressSpace) const {
Alp Toker	f907b89	2013-12-05 05:44:44 +0000	[diff] [blame]	879	// Handle non-power-of-two vectors such as <3 x float>
Nadav Rotem	f9ecbcb	2013-06-27 17:52:04 +0000	[diff] [blame]	880	if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
				881	unsigned NumElem = VTy->getVectorNumElements();
				882
				883	// Handle a few common cases:
				884	// <3 x float>
				885	if (NumElem == 3 && VTy->getScalarSizeInBits() == 32)
				886	// Cost = 64 bit store + extract + 32 bit store.
				887	return 3;
				888
				889	// <3 x double>
				890	if (NumElem == 3 && VTy->getScalarSizeInBits() == 64)
				891	// Cost = 128 bit store + unpack + 64 bit store.
				892	return 3;
				893
Alp Toker	f907b89	2013-12-05 05:44:44 +0000	[diff] [blame]	894	// Assume that all other non-power-of-two numbers are scalarized.
Nadav Rotem	f9ecbcb	2013-06-27 17:52:04 +0000	[diff] [blame]	895	if (!isPowerOf2_32(NumElem)) {
				896	unsigned Cost = TargetTransformInfo::getMemoryOpCost(Opcode,
				897	VTy->getScalarType(),
				898	Alignment,
				899	AddressSpace);
				900	unsigned SplitCost = getScalarizationOverhead(Src,
				901	Opcode == Instruction::Load,
				902	Opcode==Instruction::Store);
				903	return NumElem * Cost + SplitCost;
				904	}
				905	}
				906
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	907	// Legalize the type.
				908	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
				909	assert((Opcode == Instruction::Load \|\| Opcode == Instruction::Store) &&
				910	"Invalid Opcode");
				911
				912	// Each load/store unit costs 1.
				913	unsigned Cost = LT.first * 1;
				914
				915	// On Sandybridge 256bit load/stores are double pumped
				916	// (but not on Haswell).
				917	if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
				918	Cost*=2;
				919
				920	return Cost;
				921	}
Arnold Schwaighofer	6042a26	2013-07-12 19:16:07 +0000	[diff] [blame]	922
Elena Demikhovsky	a3232f7	2015-01-25 08:44:46 +0000	[diff] [blame]	923	unsigned X86TTI::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
				924	unsigned Alignment,
				925	unsigned AddressSpace) const {
				926	VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
				927	if (!SrcVTy)
				928	// To calculate scalar take the regular cost, without mask
				929	return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace);
				930
				931	unsigned NumElem = SrcVTy->getVectorNumElements();
				932	VectorType *MaskTy =
				933	VectorType::get(Type::getInt8Ty(getGlobalContext()), NumElem);
				934	if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) \|\|
				935	(Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) \|\|
				936	!isPowerOf2_32(NumElem)) {
				937	// Scalarization
				938	unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
				939	unsigned ScalarCompareCost =
				940	getCmpSelInstrCost(Instruction::ICmp,
				941	Type::getInt8Ty(getGlobalContext()), NULL);
				942	unsigned BranchCost = getCFInstrCost(Instruction::Br);
				943	unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
				944
				945	unsigned ValueSplitCost =
				946	getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load,
				947	Opcode == Instruction::Store);
				948	unsigned MemopCost = NumElem *
				949	TargetTransformInfo::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
				950	Alignment, AddressSpace);
				951	return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
				952	}
				953
				954	// Legalize the type.
				955	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy);
				956	unsigned Cost = 0;
				957	if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() &&
				958	LT.second.getVectorNumElements() == NumElem)
				959	// Promotion requires expand/truncate for data and a shuffle for mask.
				960	Cost += getShuffleCost(TargetTransformInfo::SK_Alternate, SrcVTy, 0, 0) +
				961	getShuffleCost(TargetTransformInfo::SK_Alternate, MaskTy, 0, 0);
				962
				963	else if (LT.second.getVectorNumElements() > NumElem) {
				964	VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
				965	LT.second.getVectorNumElements());
				966	// Expanding requires fill mask with zeroes
				967	Cost += getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
				968	NewMaskTy, 0, MaskTy);
				969	}
				970	if (!ST->hasAVX512())
				971	return Cost + LT.first*4; // Each maskmov costs 4
				972
				973	// AVX-512 masked load/store is cheapper
				974	return Cost+LT.first;
				975	}
				976
Arnold Schwaighofer	6042a26	2013-07-12 19:16:07 +0000	[diff] [blame]	977	unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
				978	// Address computations in vectorized code with non-consecutive addresses will
				979	// likely result in more instructions compared to scalar code where the
				980	// computation can more often be merged into the index mode. The resulting
				981	// extra micro-ops can significantly decrease throughput.
				982	unsigned NumVectorInstToHideOverhead = 10;
				983
				984	if (Ty->isVectorTy() && IsComplex)
				985	return NumVectorInstToHideOverhead;
				986
				987	return TargetTransformInfo::getAddressComputationCost(Ty, IsComplex);
				988	}
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	989
				990	unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
				991	bool IsPairwise) const {
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	992
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	993	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	994
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	995	MVT MTy = LT.second;
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	996
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	997	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				998	assert(ISD && "Invalid opcode");
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	999
				1000	// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
				1001	// and make it as the cost.
				1002
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	1003	static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
				1004	{ ISD::FADD, MVT::v2f64, 2 },
				1005	{ ISD::FADD, MVT::v4f32, 4 },
				1006	{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
				1007	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
				1008	{ ISD::ADD, MVT::v8i16, 5 },
				1009	};
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	1010
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	1011	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
				1012	{ ISD::FADD, MVT::v4f32, 4 },
				1013	{ ISD::FADD, MVT::v4f64, 5 },
				1014	{ ISD::FADD, MVT::v8f32, 7 },
				1015	{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
				1016	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
				1017	{ ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
				1018	{ ISD::ADD, MVT::v8i16, 5 },
				1019	{ ISD::ADD, MVT::v8i32, 5 },
				1020	};
				1021
				1022	static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
				1023	{ ISD::FADD, MVT::v2f64, 2 },
				1024	{ ISD::FADD, MVT::v4f32, 4 },
				1025	{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
				1026	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
				1027	{ ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
				1028	};
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	1029
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	1030	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
				1031	{ ISD::FADD, MVT::v4f32, 3 },
				1032	{ ISD::FADD, MVT::v4f64, 3 },
				1033	{ ISD::FADD, MVT::v8f32, 4 },
				1034	{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
				1035	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8".
				1036	{ ISD::ADD, MVT::v4i64, 3 },
				1037	{ ISD::ADD, MVT::v8i16, 4 },
				1038	{ ISD::ADD, MVT::v8i32, 5 },
				1039	};
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	1040
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	1041	if (IsPairwise) {
				1042	if (ST->hasAVX()) {
				1043	int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
				1044	if (Idx != -1)
				1045	return LT.first * AVX1CostTblPairWise[Idx].Cost;
				1046	}
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	1047
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	1048	if (ST->hasSSE42()) {
				1049	int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
				1050	if (Idx != -1)
				1051	return LT.first * SSE42CostTblPairWise[Idx].Cost;
				1052	}
				1053	} else {
				1054	if (ST->hasAVX()) {
				1055	int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
				1056	if (Idx != -1)
				1057	return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
				1058	}
Michael Liao	5bf9578	2014-12-04 05:20:33 +0000	[diff] [blame]	1059
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	1060	if (ST->hasSSE42()) {
				1061	int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
				1062	if (Idx != -1)
				1063	return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
				1064	}
				1065	}
				1066
				1067	return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise);
				1068	}
				1069
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	1070	/// \brief Calculate the cost of materializing a 64-bit value. This helper
				1071	/// method might only calculate a fraction of a larger immediate. Therefore it
				1072	/// is valid to return a cost of ZERO.
				1073	unsigned X86TTI::getIntImmCost(int64_t Val) const {
				1074	if (Val == 0)
				1075	return TCC_Free;
				1076
				1077	if (isInt<32>(Val))
				1078	return TCC_Basic;
				1079
				1080	return 2 * TCC_Basic;
				1081	}
				1082
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1083	unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
				1084	assert(Ty->isIntegerTy());
				1085
				1086	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				1087	if (BitSize == 0)
				1088	return ~0U;
				1089
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1090	// Never hoist constants larger than 128bit, because this might lead to
				1091	// incorrect code generation or assertions in codegen.
				1092	// Fixme: Create a cost model for types larger than i128 once the codegen
				1093	// issues have been fixed.
				1094	if (BitSize > 128)
				1095	return TCC_Free;
				1096
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1097	if (Imm == 0)
				1098	return TCC_Free;
				1099
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	1100	// Sign-extend all constants to a multiple of 64-bit.
				1101	APInt ImmVal = Imm;
				1102	if (BitSize & 0x3f)
				1103	ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
				1104
				1105	// Split the constant into 64-bit chunks and calculate the cost for each
				1106	// chunk.
				1107	unsigned Cost = 0;
				1108	for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
				1109	APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
				1110	int64_t Val = Tmp.getSExtValue();
				1111	Cost += getIntImmCost(Val);
				1112	}
				1113	// We need at least one instruction to materialze the constant.
				1114	return std::max(1U, Cost);
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1115	}
				1116
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1117	unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1118	Type *Ty) const {
				1119	assert(Ty->isIntegerTy());
				1120
				1121	unsigned BitSize = Ty->getPrimitiveSizeInBits();
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1122	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				1123	// here, so that constant hoisting will ignore this constant.
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1124	if (BitSize == 0)
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1125	return TCC_Free;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1126
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1127	unsigned ImmIdx = ~0U;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1128	switch (Opcode) {
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1129	default: return TCC_Free;
				1130	case Instruction::GetElementPtr:
Juergen Ributzka	27435b3	2014-04-02 21:45:36 +0000	[diff] [blame]	1131	// Always hoist the base address of a GetElementPtr. This prevents the
				1132	// creation of new constants for every base constant that gets constant
				1133	// folded with the offset.
Juergen Ributzka	631c491	2014-03-25 18:01:25 +0000	[diff] [blame]	1134	if (Idx == 0)
				1135	return 2 * TCC_Basic;
				1136	return TCC_Free;
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1137	case Instruction::Store:
				1138	ImmIdx = 0;
				1139	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1140	case Instruction::Add:
				1141	case Instruction::Sub:
				1142	case Instruction::Mul:
				1143	case Instruction::UDiv:
				1144	case Instruction::SDiv:
				1145	case Instruction::URem:
				1146	case Instruction::SRem:
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1147	case Instruction::And:
				1148	case Instruction::Or:
				1149	case Instruction::Xor:
				1150	case Instruction::ICmp:
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1151	ImmIdx = 1;
				1152	break;
Michael Zolotukhin	1f4a960	2014-04-30 19:17:32 +0000	[diff] [blame]	1153	// Always return TCC_Free for the shift value of a shift instruction.
				1154	case Instruction::Shl:
				1155	case Instruction::LShr:
				1156	case Instruction::AShr:
				1157	if (Idx == 1)
				1158	return TCC_Free;
				1159	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1160	case Instruction::Trunc:
				1161	case Instruction::ZExt:
				1162	case Instruction::SExt:
				1163	case Instruction::IntToPtr:
				1164	case Instruction::PtrToInt:
				1165	case Instruction::BitCast:
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1166	case Instruction::PHI:
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1167	case Instruction::Call:
				1168	case Instruction::Select:
				1169	case Instruction::Ret:
				1170	case Instruction::Load:
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1171	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1172	}
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1173
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	1174	if (Idx == ImmIdx) {
				1175	unsigned NumConstants = (BitSize + 63) / 64;
				1176	unsigned Cost = X86TTI::getIntImmCost(Imm, Ty);
Saleem Abdulrasool	3c890c4	2014-06-12 17:56:18 +0000	[diff] [blame]	1177	return (Cost <= NumConstants * TCC_Basic)
				1178	? static_cast<unsigned>(TCC_Free)
				1179	: Cost;
Juergen Ributzka	b2e4edb	2014-06-10 00:32:29 +0000	[diff] [blame]	1180	}
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1181
				1182	return X86TTI::getIntImmCost(Imm, Ty);
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1183	}
				1184
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1185	unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
				1186	const APInt &Imm, Type *Ty) const {
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1187	assert(Ty->isIntegerTy());
				1188
				1189	unsigned BitSize = Ty->getPrimitiveSizeInBits();
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1190	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				1191	// here, so that constant hoisting will ignore this constant.
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1192	if (BitSize == 0)
Juergen Ributzka	4317617	2014-05-19 21:00:53 +0000	[diff] [blame]	1193	return TCC_Free;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1194
				1195	switch (IID) {
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1196	default: return TCC_Free;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1197	case Intrinsic::sadd_with_overflow:
				1198	case Intrinsic::uadd_with_overflow:
				1199	case Intrinsic::ssub_with_overflow:
				1200	case Intrinsic::usub_with_overflow:
				1201	case Intrinsic::smul_with_overflow:
				1202	case Intrinsic::umul_with_overflow:
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1203	if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1204	return TCC_Free;
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1205	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1206	case Intrinsic::experimental_stackmap:
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1207	if ((Idx < 2) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Juergen Ributzka	f0dff49	2014-03-21 06:04:45 +0000	[diff] [blame]	1208	return TCC_Free;
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1209	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1210	case Intrinsic::experimental_patchpoint_void:
				1211	case Intrinsic::experimental_patchpoint_i64:
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1212	if ((Idx < 4) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1213	return TCC_Free;
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1214	break;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1215	}
Juergen Ributzka	5eef98c	2014-03-25 18:01:23 +0000	[diff] [blame]	1216	return X86TTI::getIntImmCost(Imm, Ty);
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	1217	}
Elena Demikhovsky	f1de34b	2014-12-04 09:40:44 +0000	[diff] [blame]	1218
Elena Demikhovsky	fb81b93	2014-12-25 07:49:20 +0000	[diff] [blame]	1219	bool X86TTI::isLegalMaskedLoad(Type *DataTy, int Consecutive) const {
				1220	int DataWidth = DataTy->getPrimitiveSizeInBits();
Elena Demikhovsky	f1de34b	2014-12-04 09:40:44 +0000	[diff] [blame]	1221
				1222	// Todo: AVX512 allows gather/scatter, works with strided and random as well
Elena Demikhovsky	fb81b93	2014-12-25 07:49:20 +0000	[diff] [blame]	1223	if ((DataWidth < 32) \|\| (Consecutive == 0))
Elena Demikhovsky	f1de34b	2014-12-04 09:40:44 +0000	[diff] [blame]	1224	return false;
				1225	if (ST->hasAVX512() \|\| ST->hasAVX2())
				1226	return true;
				1227	return false;
				1228	}
				1229
Elena Demikhovsky	3fcafa2	2014-12-14 09:43:50 +0000	[diff] [blame]	1230	bool X86TTI::isLegalMaskedStore(Type *DataType, int Consecutive) const {
				1231	return isLegalMaskedLoad(DataType, Consecutive);
Elena Demikhovsky	f1de34b	2014-12-04 09:40:44 +0000	[diff] [blame]	1232	}
				1233