Blame - llvm/lib/Target/X86/X86TargetTransformInfo.cpp - toolchain/llvm-project

blob: c2456e74adbf47e1635a36f1f6508c6a89491df7 [file] [log] [blame]

Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	1	//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file implements a TargetTransformInfo analysis pass specific to the
				11	/// X86 target machine. It uses the target's detailed information to provide
				12	/// more precise answers to certain TTI queries, while letting the target
				13	/// independent and default TTI implementations handle the rest.
				14	///
				15	//===----------------------------------------------------------------------===//
				16
				17	#define DEBUG_TYPE "x86tti"
				18	#include "X86.h"
				19	#include "X86TargetMachine.h"
Chandler Carruth	d3e7355	2013-01-07 03:08:10 +0000	[diff] [blame]	20	#include "llvm/Analysis/TargetTransformInfo.h"
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	21	#include "llvm/IR/IntrinsicInst.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	22	#include "llvm/Support/Debug.h"
Renato Golin	d4c392e	2013-01-24 23:01:00 +0000	[diff] [blame]	23	#include "llvm/Target/CostTable.h"
Chandler Carruth	8a8cd2b	2014-01-07 11:48:04 +0000	[diff] [blame]	24	#include "llvm/Target/TargetLowering.h"
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	25	using namespace llvm;
				26
				27	// Declare the pass initialization routine locally as target-specific passes
				28	// don't havve a target-wide initialization entry point, and so we rely on the
				29	// pass constructor initialization.
				30	namespace llvm {
				31	void initializeX86TTIPass(PassRegistry &);
				32	}
				33
				34	namespace {
				35
Craig Topper	77dfe45	2014-03-02 08:08:51 +0000	[diff] [blame]	36	class X86TTI final : public ImmutablePass, public TargetTransformInfo {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	37	const X86Subtarget *ST;
				38	const X86TargetLowering *TLI;
				39
				40	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
				41	/// are set if the result needs to be inserted and/or extracted from vectors.
				42	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
				43
				44	public:
Nadav Rotem	02dd93e	2013-06-27 17:54:10 +0000	[diff] [blame]	45	X86TTI() : ImmutablePass(ID), ST(0), TLI(0) {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	46	llvm_unreachable("This pass cannot be directly constructed");
				47	}
				48
				49	X86TTI(const X86TargetMachine *TM)
Juergen Ributzka	3e752e7	2014-01-24 18:22:59 +0000	[diff] [blame]	50	: ImmutablePass(ID), ST(TM->getSubtargetImpl()),
				51	TLI(TM->getTargetLowering()) {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	52	initializeX86TTIPass(*PassRegistry::getPassRegistry());
				53	}
				54
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	55	virtual void initializePass() override {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	56	pushTTIStack(this);
				57	}
				58
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	59	virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	60	TargetTransformInfo::getAnalysisUsage(AU);
				61	}
				62
				63	/// Pass identification.
				64	static char ID;
				65
				66	/// Provide necessary pointer adjustments for the two base classes.
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	67	virtual void getAdjustedAnalysisPointer(const void ID) override {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	68	if (ID == &TargetTransformInfo::ID)
				69	return (TargetTransformInfo*)this;
				70	return this;
				71	}
				72
				73	/// \name Scalar TTI Implementations
				74	/// @{
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	75	virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	76
				77	/// @}
				78
				79	/// \name Vector TTI Implementations
				80	/// @{
				81
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	82	virtual unsigned getNumberOfRegisters(bool Vector) const override;
				83	virtual unsigned getRegisterBitWidth(bool Vector) const override;
				84	virtual unsigned getMaximumUnrollFactor() const override;
Arnold Schwaighofer	b977387	2013-04-04 23:26:21 +0000	[diff] [blame]	85	virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
				86	OperandValueKind,
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	87	OperandValueKind) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	88	virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	89	int Index, Type *SubTp) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	90	virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	91	Type *Src) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	92	virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	93	Type *CondTy) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	94	virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	95	unsigned Index) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	96	virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
				97	unsigned Alignment,
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	98	unsigned AddressSpace) const override;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	99
Juergen Ributzka	3e752e7	2014-01-24 18:22:59 +0000	[diff] [blame]	100	virtual unsigned
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	101	getAddressComputationCost(Type *PtrTy, bool IsComplex) const override;
Arnold Schwaighofer	6042a26	2013-07-12 19:16:07 +0000	[diff] [blame]	102
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	103	virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
				104	bool IsPairwiseForm) const override;
				105
				106	virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	107
				108	virtual unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	109	Type *Ty) const override;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	110	virtual unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
Craig Topper	7315602	2014-03-02 09:09:27 +0000	[diff] [blame]	111	Type *Ty) const override;
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	112
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	113	/// @}
				114	};
				115
				116	} // end anonymous namespace
				117
				118	INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
				119	"X86 Target Transform Info", true, true, false)
				120	char X86TTI::ID = 0;
				121
				122	ImmutablePass *
				123	llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
				124	return new X86TTI(TM);
				125	}
				126
				127
				128	//===----------------------------------------------------------------------===//
				129	//
				130	// X86 cost model.
				131	//
				132	//===----------------------------------------------------------------------===//
				133
Chandler Carruth	50a36cd	2013-01-07 03:16:03 +0000	[diff] [blame]	134	X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	135	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
				136	// TODO: Currently the __builtin_popcount() implementation using SSE3
				137	// instructions is inefficient. Once the problem is fixed, we should
Craig Topper	0a63e1d	2013-09-08 00:47:31 +0000	[diff] [blame]	138	// call ST->hasSSE3() instead of ST->hasPOPCNT().
				139	return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	140	}
				141
				142	unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
Nadav Rotem	b1791a7	2013-01-09 22:29:00 +0000	[diff] [blame]	143	if (Vector && !ST->hasSSE1())
				144	return 0;
				145
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	146	if (ST->is64Bit())
				147	return 16;
				148	return 8;
				149	}
				150
Nadav Rotem	b1791a7	2013-01-09 22:29:00 +0000	[diff] [blame]	151	unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
				152	if (Vector) {
				153	if (ST->hasAVX()) return 256;
				154	if (ST->hasSSE1()) return 128;
				155	return 0;
				156	}
				157
				158	if (ST->is64Bit())
				159	return 64;
				160	return 32;
				161
				162	}
				163
Nadav Rotem	b696c36	2013-01-09 01:15:42 +0000	[diff] [blame]	164	unsigned X86TTI::getMaximumUnrollFactor() const {
				165	if (ST->isAtom())
				166	return 1;
				167
				168	// Sandybridge and Haswell have multiple execution ports and pipelined
				169	// vector units.
				170	if (ST->hasAVX())
				171	return 4;
				172
				173	return 2;
				174	}
				175
Arnold Schwaighofer	b977387	2013-04-04 23:26:21 +0000	[diff] [blame]	176	unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
				177	OperandValueKind Op1Info,
				178	OperandValueKind Op2Info) const {
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	179	// Legalize the type.
				180	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
				181
				182	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				183	assert(ISD && "Invalid opcode");
				184
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	185	static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	186	// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
				187	// customize them to detect the cases where shift amount is a scalar one.
				188	{ ISD::SHL, MVT::v4i32, 1 },
				189	{ ISD::SRL, MVT::v4i32, 1 },
				190	{ ISD::SRA, MVT::v4i32, 1 },
				191	{ ISD::SHL, MVT::v8i32, 1 },
				192	{ ISD::SRL, MVT::v8i32, 1 },
				193	{ ISD::SRA, MVT::v8i32, 1 },
				194	{ ISD::SHL, MVT::v2i64, 1 },
				195	{ ISD::SRL, MVT::v2i64, 1 },
				196	{ ISD::SHL, MVT::v4i64, 1 },
				197	{ ISD::SRL, MVT::v4i64, 1 },
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	198
				199	{ ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence.
				200	{ ISD::SHL, MVT::v16i16, 16*10 }, // Scalarized.
				201
				202	{ ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized.
				203	{ ISD::SRL, MVT::v16i16, 8*10 }, // Scalarized.
				204
				205	{ ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized.
				206	{ ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized.
				207	{ ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
Arnold Schwaighofer	a04b9ef	2013-06-25 19:14:09 +0000	[diff] [blame]	208
				209	// Vectorizing division is a bad idea. See the SSE2 table for more comments.
				210	{ ISD::SDIV, MVT::v32i8, 32*20 },
				211	{ ISD::SDIV, MVT::v16i16, 16*20 },
				212	{ ISD::SDIV, MVT::v8i32, 8*20 },
				213	{ ISD::SDIV, MVT::v4i64, 4*20 },
				214	{ ISD::UDIV, MVT::v32i8, 32*20 },
				215	{ ISD::UDIV, MVT::v16i16, 16*20 },
				216	{ ISD::UDIV, MVT::v8i32, 8*20 },
				217	{ ISD::UDIV, MVT::v4i64, 4*20 },
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	218	};
				219
				220	// Look for AVX2 lowering tricks.
				221	if (ST->hasAVX2()) {
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	222	if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
				223	(Op2Info == TargetTransformInfo::OK_UniformConstantValue \|\|
				224	Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
				225	// On AVX2, a packed v16i16 shift left by a constant build_vector
				226	// is lowered into a vector multiply (vpmullw).
				227	return LT.first;
				228
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	229	int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
Michael Liao	70dd7f9	2013-03-20 22:01:10 +0000	[diff] [blame]	230	if (Idx != -1)
				231	return LT.first * AVX2CostTable[Idx].Cost;
				232	}
				233
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	234	static const CostTblEntry<MVT::SimpleValueType>
				235	SSE2UniformConstCostTable[] = {
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	236	// We don't correctly identify costs of casts because they are marked as
				237	// custom.
				238	// Constant splats are cheaper for the following instructions.
				239	{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
				240	{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
				241	{ ISD::SHL, MVT::v4i32, 1 }, // pslld
				242	{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
				243
				244	{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
				245	{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
				246	{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
				247	{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
				248
				249	{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
				250	{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
				251	{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
				252	};
				253
				254	if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
				255	ST->hasSSE2()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	256	int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	257	if (Idx != -1)
				258	return LT.first * SSE2UniformConstCostTable[Idx].Cost;
				259	}
				260
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	261	if (ISD == ISD::SHL &&
				262	Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
				263	EVT VT = LT.second;
				264	if ((VT == MVT::v8i16 && ST->hasSSE2()) \|\|
				265	(VT == MVT::v4i32 && ST->hasSSE41()))
				266	// Vector shift left by non uniform constant can be lowered
				267	// into vector multiply (pmullw/pmulld).
				268	return LT.first;
				269	if (VT == MVT::v4i32 && ST->hasSSE2())
				270	// A vector shift left by non uniform constant is converted
				271	// into a vector multiply; the new multiply is eventually
				272	// lowered into a sequence of shuffles and 2 x pmuludq.
				273	ISD = ISD::MUL;
				274	}
Arnold Schwaighofer	44f902e	2013-04-04 23:26:24 +0000	[diff] [blame]	275
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	276	static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	277	// We don't correctly identify costs of casts because they are marked as
				278	// custom.
				279	// For some cases, where the shift amount is a scalar we would be able
				280	// to generate better code. Unfortunately, when this is the case the value
				281	// (the splat) will get hoisted out of the loop, thereby making it invisible
				282	// to ISel. The cost model must return worst case assumptions because it is
				283	// used for vectorization and we don't want to make vectorized code worse
				284	// than scalar code.
				285	{ ISD::SHL, MVT::v16i8, 30 }, // cmpeqb sequence.
				286	{ ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
				287	{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
				288	{ ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	289	{ ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	290
				291	{ ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
				292	{ ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
				293	{ ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
				294	{ ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
				295
				296	{ ISD::SRA, MVT::v16i8, 16*10 }, // Scalarized.
				297	{ ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized.
				298	{ ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
				299	{ ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
Arnold Schwaighofer	a04b9ef	2013-06-25 19:14:09 +0000	[diff] [blame]	300
				301	// It is not a good idea to vectorize division. We have to scalarize it and
				302	// in the process we will often end up having to spilling regular
				303	// registers. The overhead of division is going to dominate most kernels
				304	// anyways so try hard to prevent vectorization of division - it is
				305	// generally a bad idea. Assume somewhat arbitrarily that we have to be able
				306	// to hide "20 cycles" for each lane.
				307	{ ISD::SDIV, MVT::v16i8, 16*20 },
				308	{ ISD::SDIV, MVT::v8i16, 8*20 },
				309	{ ISD::SDIV, MVT::v4i32, 4*20 },
				310	{ ISD::SDIV, MVT::v2i64, 2*20 },
				311	{ ISD::UDIV, MVT::v16i8, 16*20 },
				312	{ ISD::UDIV, MVT::v8i16, 8*20 },
				313	{ ISD::UDIV, MVT::v4i32, 4*20 },
				314	{ ISD::UDIV, MVT::v2i64, 2*20 },
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	315	};
				316
				317	if (ST->hasSSE2()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	318	int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
Arnold Schwaighofer	e9b5016	2013-04-03 21:46:05 +0000	[diff] [blame]	319	if (Idx != -1)
				320	return LT.first * SSE2CostTable[Idx].Cost;
				321	}
				322
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	323	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	324	// We don't have to scalarize unsupported ops. We can issue two half-sized
				325	// operations and we only need to extract the upper YMM half.
				326	// Two ops + 1 extract + 1 insert = 4.
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	327	{ ISD::MUL, MVT::v16i16, 4 },
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	328	{ ISD::MUL, MVT::v8i32, 4 },
				329	{ ISD::SUB, MVT::v8i32, 4 },
				330	{ ISD::ADD, MVT::v8i32, 4 },
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	331	{ ISD::SUB, MVT::v4i64, 4 },
				332	{ ISD::ADD, MVT::v4i64, 4 },
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	333	// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
				334	// are lowered as a series of long multiplies(3), shifts(4) and adds(2)
				335	// Because we believe v4i64 to be a legal type, we must also include the
				336	// split factor of two in the cost table. Therefore, the cost here is 18
				337	// instead of 9.
				338	{ ISD::MUL, MVT::v4i64, 18 },
				339	};
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	340
				341	// Look for AVX1 lowering tricks.
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	342	if (ST->hasAVX() && !ST->hasAVX2()) {
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	343	EVT VT = LT.second;
				344
				345	// v16i16 and v8i32 shifts by non-uniform constants are lowered into a
				346	// sequence of extract + two vector multiply + insert.
				347	if (ISD == ISD::SHL && (VT == MVT::v8i32 \|\| VT == MVT::v16i16) &&
				348	Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
				349	ISD = ISD::MUL;
				350
				351	int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	352	if (Idx != -1)
				353	return LT.first * AVX1CostTable[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	354	}
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	355
				356	// Custom lowering of vectors.
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	357	static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	358	// A v2i64/v4i64 and multiply is custom lowered as a series of long
				359	// multiplies(3), shifts(4) and adds(2).
				360	{ ISD::MUL, MVT::v2i64, 9 },
				361	{ ISD::MUL, MVT::v4i64, 9 },
				362	};
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	363	int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	364	if (Idx != -1)
				365	return LT.first * CustomLowered[Idx].Cost;
				366
				367	// Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
				368	// 2x pmuludq, 2x shuffle.
				369	if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
				370	!ST->hasSSE41())
Andrea Di Biagio	b7882b3	2014-02-12 23:43:47 +0000	[diff] [blame]	371	return LT.first * 6;
Arnold Schwaighofer	20ef54f	2013-03-02 04:02:52 +0000	[diff] [blame]	372
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	373	// Fallback to the default implementation.
Arnold Schwaighofer	b977387	2013-04-04 23:26:21 +0000	[diff] [blame]	374	return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
				375	Op2Info);
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	376	}
				377
				378	unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
				379	Type *SubTp) const {
				380	// We only estimate the cost of reverse shuffles.
Chandler Carruth	2109f47	2013-01-07 03:20:02 +0000	[diff] [blame]	381	if (Kind != SK_Reverse)
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	382	return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
				383
				384	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
				385	unsigned Cost = 1;
				386	if (LT.second.getSizeInBits() > 128)
				387	Cost = 3; // Extract + insert + copy.
				388
				389	// Multiple by the number of parts.
				390	return Cost * LT.first;
				391	}
				392
				393	unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type Dst, Type Src) const {
				394	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				395	assert(ISD && "Invalid opcode");
				396
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	397	std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
				398	std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
				399
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	400	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
				401	SSE2ConvTbl[] = {
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	402	// These are somewhat magic numbers justified by looking at the output of
				403	// Intel's IACA, running some kernels and making sure when we take
				404	// legalization into account the throughput will be overestimated.
				405	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
				406	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
				407	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
				408	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
				409	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
				410	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
				411	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
				412	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
				413	// There are faster sequences for float conversions.
				414	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
				415	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
				416	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
				417	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
				418	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
				419	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
				420	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
				421	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
				422	};
				423
				424	if (ST->hasSSE2() && !ST->hasAVX()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	425	int Idx =
				426	ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
Arnold Schwaighofer	f47d2d7	2013-04-08 18:05:48 +0000	[diff] [blame]	427	if (Idx != -1)
				428	return LTSrc.first * SSE2ConvTbl[Idx].Cost;
				429	}
				430
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	431	EVT SrcTy = TLI->getValueType(Src);
				432	EVT DstTy = TLI->getValueType(Dst);
				433
Arnold Schwaighofer	c0c7ff4	2013-04-17 20:04:53 +0000	[diff] [blame]	434	// The function getSimpleVT only handles simple value types.
				435	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
				436	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				437
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	438	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	439	AVX2ConversionTbl[] = {
				440	{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
				441	{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
				442	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
				443	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
				444	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				445	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				446	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				447	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
				448	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
				449	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
				450	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
				451	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
				452	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				453	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				454	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				455	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
				456
				457	{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 },
				458	{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 },
				459	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
				460	{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
				461	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
				462	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
				463	};
				464
				465	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	466	AVXConversionTbl[] = {
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	467	{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
				468	{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
				469	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
				470	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
				471	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 },
				472	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
				473	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
				474	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
				475	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 },
				476	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },
				477	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
				478	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
				479	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
				480	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				481	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
				482	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
				483
				484	{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 },
				485	{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 },
				486	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 },
				487	{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
				488	{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
				489	{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },
				490	{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 },
Benjamin Kramer	52ceb44	2013-04-01 10:23:49 +0000	[diff] [blame]	491
				492	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
				493	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
				494	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
				495	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
				496	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
				497	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
				498	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
				499	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
				500	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
				501	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
				502	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
				503	{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
				504
				505	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
				506	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
				507	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
				508	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
				509	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
				510	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
				511	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				512	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
				513	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
				514	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
				515	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
				516	{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
				517
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	518	{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
				519	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	520	};
				521
Tim Northover	f0e2161	2014-02-06 18:18:36 +0000	[diff] [blame]	522	if (ST->hasAVX2()) {
				523	int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
				524	DstTy.getSimpleVT(), SrcTy.getSimpleVT());
				525	if (Idx != -1)
				526	return AVX2ConversionTbl[Idx].Cost;
				527	}
				528
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	529	if (ST->hasAVX()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	530	int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
				531	SrcTy.getSimpleVT());
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	532	if (Idx != -1)
				533	return AVXConversionTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	534	}
				535
				536	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				537	}
				538
				539	unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				540	Type *CondTy) const {
				541	// Legalize the type.
				542	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
				543
				544	MVT MTy = LT.second;
				545
				546	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				547	assert(ISD && "Invalid opcode");
				548
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	549	static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	550	{ ISD::SETCC, MVT::v2f64, 1 },
				551	{ ISD::SETCC, MVT::v4f32, 1 },
				552	{ ISD::SETCC, MVT::v2i64, 1 },
				553	{ ISD::SETCC, MVT::v4i32, 1 },
				554	{ ISD::SETCC, MVT::v8i16, 1 },
				555	{ ISD::SETCC, MVT::v16i8, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	556	};
				557
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	558	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	559	{ ISD::SETCC, MVT::v4f64, 1 },
				560	{ ISD::SETCC, MVT::v8f32, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	561	// AVX1 does not support 8-wide integer compare.
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	562	{ ISD::SETCC, MVT::v4i64, 4 },
				563	{ ISD::SETCC, MVT::v8i32, 4 },
				564	{ ISD::SETCC, MVT::v16i16, 4 },
				565	{ ISD::SETCC, MVT::v32i8, 4 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	566	};
				567
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	568	static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	569	{ ISD::SETCC, MVT::v4i64, 1 },
				570	{ ISD::SETCC, MVT::v8i32, 1 },
				571	{ ISD::SETCC, MVT::v16i16, 1 },
				572	{ ISD::SETCC, MVT::v32i8, 1 },
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	573	};
				574
				575	if (ST->hasAVX2()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	576	int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	577	if (Idx != -1)
				578	return LT.first * AVX2CostTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	579	}
				580
				581	if (ST->hasAVX()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	582	int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	583	if (Idx != -1)
				584	return LT.first * AVX1CostTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	585	}
				586
				587	if (ST->hasSSE42()) {
Benjamin Kramer	21585fd	2013-08-09 19:33:32 +0000	[diff] [blame]	588	int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
Renato Golin	e1fb059	2013-01-20 20:57:20 +0000	[diff] [blame]	589	if (Idx != -1)
				590	return LT.first * SSE42CostTbl[Idx].Cost;
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	591	}
				592
				593	return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
				594	}
				595
				596	unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
				597	unsigned Index) const {
				598	assert(Val->isVectorTy() && "This must be a vector type");
				599
				600	if (Index != -1U) {
				601	// Legalize the type.
				602	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
				603
				604	// This type is legalized to a scalar type.
				605	if (!LT.second.isVector())
				606	return 0;
				607
				608	// The type may be split. Normalize the index to the new type.
				609	unsigned Width = LT.second.getVectorNumElements();
				610	Index = Index % Width;
				611
				612	// Floating point scalars are already located in index #0.
				613	if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
				614	return 0;
				615	}
				616
				617	return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
				618	}
				619
Nadav Rotem	f9ecbcb	2013-06-27 17:52:04 +0000	[diff] [blame]	620	unsigned X86TTI::getScalarizationOverhead(Type *Ty, bool Insert,
				621	bool Extract) const {
				622	assert (Ty->isVectorTy() && "Can only scalarize vectors");
				623	unsigned Cost = 0;
				624
				625	for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
				626	if (Insert)
				627	Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
				628	if (Extract)
				629	Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
				630	}
				631
				632	return Cost;
				633	}
				634
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	635	unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				636	unsigned AddressSpace) const {
Alp Toker	f907b89	2013-12-05 05:44:44 +0000	[diff] [blame]	637	// Handle non-power-of-two vectors such as <3 x float>
Nadav Rotem	f9ecbcb	2013-06-27 17:52:04 +0000	[diff] [blame]	638	if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
				639	unsigned NumElem = VTy->getVectorNumElements();
				640
				641	// Handle a few common cases:
				642	// <3 x float>
				643	if (NumElem == 3 && VTy->getScalarSizeInBits() == 32)
				644	// Cost = 64 bit store + extract + 32 bit store.
				645	return 3;
				646
				647	// <3 x double>
				648	if (NumElem == 3 && VTy->getScalarSizeInBits() == 64)
				649	// Cost = 128 bit store + unpack + 64 bit store.
				650	return 3;
				651
Alp Toker	f907b89	2013-12-05 05:44:44 +0000	[diff] [blame]	652	// Assume that all other non-power-of-two numbers are scalarized.
Nadav Rotem	f9ecbcb	2013-06-27 17:52:04 +0000	[diff] [blame]	653	if (!isPowerOf2_32(NumElem)) {
				654	unsigned Cost = TargetTransformInfo::getMemoryOpCost(Opcode,
				655	VTy->getScalarType(),
				656	Alignment,
				657	AddressSpace);
				658	unsigned SplitCost = getScalarizationOverhead(Src,
				659	Opcode == Instruction::Load,
				660	Opcode==Instruction::Store);
				661	return NumElem * Cost + SplitCost;
				662	}
				663	}
				664
Chandler Carruth	664e354	2013-01-07 01:37:14 +0000	[diff] [blame]	665	// Legalize the type.
				666	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
				667	assert((Opcode == Instruction::Load \|\| Opcode == Instruction::Store) &&
				668	"Invalid Opcode");
				669
				670	// Each load/store unit costs 1.
				671	unsigned Cost = LT.first * 1;
				672
				673	// On Sandybridge 256bit load/stores are double pumped
				674	// (but not on Haswell).
				675	if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
				676	Cost*=2;
				677
				678	return Cost;
				679	}
Arnold Schwaighofer	6042a26	2013-07-12 19:16:07 +0000	[diff] [blame]	680
				681	unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
				682	// Address computations in vectorized code with non-consecutive addresses will
				683	// likely result in more instructions compared to scalar code where the
				684	// computation can more often be merged into the index mode. The resulting
				685	// extra micro-ops can significantly decrease throughput.
				686	unsigned NumVectorInstToHideOverhead = 10;
				687
				688	if (Ty->isVectorTy() && IsComplex)
				689	return NumVectorInstToHideOverhead;
				690
				691	return TargetTransformInfo::getAddressComputationCost(Ty, IsComplex);
				692	}
Yi Jiang	5c343de	2013-09-19 17:48:48 +0000	[diff] [blame]	693
				694	unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
				695	bool IsPairwise) const {
				696
				697	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
				698
				699	MVT MTy = LT.second;
				700
				701	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				702	assert(ISD && "Invalid opcode");
				703
				704	// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
				705	// and make it as the cost.
				706
				707	static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
				708	{ ISD::FADD, MVT::v2f64, 2 },
				709	{ ISD::FADD, MVT::v4f32, 4 },
				710	{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
				711	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
				712	{ ISD::ADD, MVT::v8i16, 5 },
				713	};
				714
				715	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
				716	{ ISD::FADD, MVT::v4f32, 4 },
				717	{ ISD::FADD, MVT::v4f64, 5 },
				718	{ ISD::FADD, MVT::v8f32, 7 },
				719	{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
				720	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
				721	{ ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
				722	{ ISD::ADD, MVT::v8i16, 5 },
				723	{ ISD::ADD, MVT::v8i32, 5 },
				724	};
				725
				726	static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
				727	{ ISD::FADD, MVT::v2f64, 2 },
				728	{ ISD::FADD, MVT::v4f32, 4 },
				729	{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
				730	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
				731	{ ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
				732	};
				733
				734	static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
				735	{ ISD::FADD, MVT::v4f32, 3 },
				736	{ ISD::FADD, MVT::v4f64, 3 },
				737	{ ISD::FADD, MVT::v8f32, 4 },
				738	{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
				739	{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8".
				740	{ ISD::ADD, MVT::v4i64, 3 },
				741	{ ISD::ADD, MVT::v8i16, 4 },
				742	{ ISD::ADD, MVT::v8i32, 5 },
				743	};
				744
				745	if (IsPairwise) {
				746	if (ST->hasAVX()) {
				747	int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
				748	if (Idx != -1)
				749	return LT.first * AVX1CostTblPairWise[Idx].Cost;
				750	}
				751
				752	if (ST->hasSSE42()) {
				753	int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
				754	if (Idx != -1)
				755	return LT.first * SSE42CostTblPairWise[Idx].Cost;
				756	}
				757	} else {
				758	if (ST->hasAVX()) {
				759	int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
				760	if (Idx != -1)
				761	return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
				762	}
				763
				764	if (ST->hasSSE42()) {
				765	int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
				766	if (Idx != -1)
				767	return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
				768	}
				769	}
				770
				771	return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise);
				772	}
				773
Juergen Ributzka	f26beda	2014-01-25 02:02:55 +0000	[diff] [blame]	774	unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
				775	assert(Ty->isIntegerTy());
				776
				777	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				778	if (BitSize == 0)
				779	return ~0U;
				780
				781	if (Imm.getBitWidth() <= 64 &&
				782	(isInt<32>(Imm.getSExtValue()) \|\| isUInt<32>(Imm.getZExtValue())))
				783	return TCC_Basic;
				784	else
				785	return 2 * TCC_Basic;
				786	}
				787
				788	unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
				789	Type *Ty) const {
				790	assert(Ty->isIntegerTy());
				791
				792	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				793	if (BitSize == 0)
				794	return ~0U;
				795
				796	switch (Opcode) {
				797	case Instruction::Add:
				798	case Instruction::Sub:
				799	case Instruction::Mul:
				800	case Instruction::UDiv:
				801	case Instruction::SDiv:
				802	case Instruction::URem:
				803	case Instruction::SRem:
				804	case Instruction::Shl:
				805	case Instruction::LShr:
				806	case Instruction::AShr:
				807	case Instruction::And:
				808	case Instruction::Or:
				809	case Instruction::Xor:
				810	case Instruction::ICmp:
				811	if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
				812	return TCC_Free;
				813	else
				814	return X86TTI::getIntImmCost(Imm, Ty);
				815	case Instruction::Trunc:
				816	case Instruction::ZExt:
				817	case Instruction::SExt:
				818	case Instruction::IntToPtr:
				819	case Instruction::PtrToInt:
				820	case Instruction::BitCast:
				821	case Instruction::Call:
				822	case Instruction::Select:
				823	case Instruction::Ret:
				824	case Instruction::Load:
				825	case Instruction::Store:
				826	return X86TTI::getIntImmCost(Imm, Ty);
				827	}
				828	return TargetTransformInfo::getIntImmCost(Opcode, Imm, Ty);
				829	}
				830
				831	unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
				832	Type *Ty) const {
				833	assert(Ty->isIntegerTy());
				834
				835	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				836	if (BitSize == 0)
				837	return ~0U;
				838
				839	switch (IID) {
				840	default: return TargetTransformInfo::getIntImmCost(IID, Imm, Ty);
				841	case Intrinsic::sadd_with_overflow:
				842	case Intrinsic::uadd_with_overflow:
				843	case Intrinsic::ssub_with_overflow:
				844	case Intrinsic::usub_with_overflow:
				845	case Intrinsic::smul_with_overflow:
				846	case Intrinsic::umul_with_overflow:
				847	if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
				848	return TCC_Free;
				849	else
				850	return X86TTI::getIntImmCost(Imm, Ty);
				851	case Intrinsic::experimental_stackmap:
				852	case Intrinsic::experimental_patchpoint_void:
				853	case Intrinsic::experimental_patchpoint_i64:
				854	if (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))
				855	return TCC_Free;
				856	else
				857	return X86TTI::getIntImmCost(Imm, Ty);
				858	}
				859	}