Blame - llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp - toolchain/llvm-project

blob: 2058dd06b21862d1c831c3890e16f36e5d89f67d [file] [log] [blame]

Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1	//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file implements a TargetTransformInfo analysis pass specific to the
				11	/// AArch64 target machine. It uses the target's detailed information to provide
				12	/// more precise answers to certain TTI queries, while letting the target
				13	/// independent and default TTI implementations handle the rest.
				14	///
				15	//===----------------------------------------------------------------------===//
				16
				17	#include "AArch64.h"
				18	#include "AArch64TargetMachine.h"
				19	#include "MCTargetDesc/AArch64AddressingModes.h"
				20	#include "llvm/Analysis/TargetTransformInfo.h"
				21	#include "llvm/Support/Debug.h"
				22	#include "llvm/Target/CostTable.h"
				23	#include "llvm/Target/TargetLowering.h"
				24	#include <algorithm>
				25	using namespace llvm;
				26
				27	#define DEBUG_TYPE "aarch64tti"
				28
				29	// Declare the pass initialization routine locally as target-specific passes
				30	// don't have a target-wide initialization entry point, and so we rely on the
				31	// pass constructor initialization.
				32	namespace llvm {
				33	void initializeAArch64TTIPass(PassRegistry &);
				34	}
				35
				36	namespace {
				37
				38	class AArch64TTI final : public ImmutablePass, public TargetTransformInfo {
				39	const AArch64TargetMachine *TM;
				40	const AArch64Subtarget *ST;
				41	const AArch64TargetLowering *TLI;
				42
				43	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
				44	/// are set if the result needs to be inserted and/or extracted from vectors.
				45	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
				46
				47	public:
				48	AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
				49	llvm_unreachable("This pass cannot be directly constructed");
				50	}
				51
				52	AArch64TTI(const AArch64TargetMachine *TM)
				53	: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
Eric Christopher	d913448	2014-08-04 21:25:23 +0000	[diff] [blame]	54	TLI(TM->getSubtargetImpl()->getTargetLowering()) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	55	initializeAArch64TTIPass(*PassRegistry::getPassRegistry());
				56	}
				57
				58	void initializePass() override { pushTTIStack(this); }
				59
				60	void getAnalysisUsage(AnalysisUsage &AU) const override {
				61	TargetTransformInfo::getAnalysisUsage(AU);
				62	}
				63
				64	/// Pass identification.
				65	static char ID;
				66
				67	/// Provide necessary pointer adjustments for the two base classes.
				68	void getAdjustedAnalysisPointer(const void ID) override {
				69	if (ID == &TargetTransformInfo::ID)
				70	return (TargetTransformInfo *)this;
				71	return this;
				72	}
				73
				74	/// \name Scalar TTI Implementations
				75	/// @{
				76	unsigned getIntImmCost(int64_t Val) const;
				77	unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
				78	unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				79	Type *Ty) const override;
				80	unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
				81	Type *Ty) const override;
				82	PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
				83
				84	/// @}
				85
				86	/// \name Vector TTI Implementations
				87	/// @{
				88
				89	unsigned getNumberOfRegisters(bool Vector) const override {
				90	if (Vector) {
				91	if (ST->hasNEON())
				92	return 32;
				93	return 0;
				94	}
				95	return 31;
				96	}
				97
				98	unsigned getRegisterBitWidth(bool Vector) const override {
				99	if (Vector) {
				100	if (ST->hasNEON())
				101	return 128;
				102	return 0;
				103	}
				104	return 64;
				105	}
				106
				107	unsigned getMaximumUnrollFactor() const override { return 2; }
				108
				109	unsigned getCastInstrCost(unsigned Opcode, Type Dst, Type Src) const
				110	override;
				111
				112	unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
				113	override;
				114
				115	unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
				116	OperandValueKind Opd1Info = OK_AnyValue,
				117	OperandValueKind Opd2Info = OK_AnyValue) const
				118	override;
				119
				120	unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
				121
				122	unsigned getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy) const
				123	override;
				124
				125	unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				126	unsigned AddressSpace) const override;
James Molloy	2b8933c	2014-08-05 12:30:34 +0000	[diff] [blame^]	127
				128	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override;
				129
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	130	/// @}
				131	};
				132
				133	} // end anonymous namespace
				134
				135	INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti",
				136	"AArch64 Target Transform Info", true, true, false)
				137	char AArch64TTI::ID = 0;
				138
				139	ImmutablePass *
				140	llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) {
				141	return new AArch64TTI(TM);
				142	}
				143
				144	/// \brief Calculate the cost of materializing a 64-bit value. This helper
				145	/// method might only calculate a fraction of a larger immediate. Therefore it
				146	/// is valid to return a cost of ZERO.
				147	unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
				148	// Check if the immediate can be encoded within an instruction.
				149	if (Val == 0 \|\| AArch64_AM::isLogicalImmediate(Val, 64))
				150	return 0;
				151
				152	if (Val < 0)
				153	Val = ~Val;
				154
				155	// Calculate how many moves we will need to materialize this constant.
				156	unsigned LZ = countLeadingZeros((uint64_t)Val);
				157	return (64 - LZ + 15) / 16;
				158	}
				159
				160	/// \brief Calculate the cost of materializing the given constant.
				161	unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
				162	assert(Ty->isIntegerTy());
				163
				164	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				165	if (BitSize == 0)
				166	return ~0U;
				167
				168	// Sign-extend all constants to a multiple of 64-bit.
				169	APInt ImmVal = Imm;
				170	if (BitSize & 0x3f)
				171	ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
				172
				173	// Split the constant into 64-bit chunks and calculate the cost for each
				174	// chunk.
				175	unsigned Cost = 0;
				176	for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
				177	APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
				178	int64_t Val = Tmp.getSExtValue();
				179	Cost += getIntImmCost(Val);
				180	}
				181	// We need at least one instruction to materialze the constant.
				182	return std::max(1U, Cost);
				183	}
				184
				185	unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
				186	const APInt &Imm, Type *Ty) const {
				187	assert(Ty->isIntegerTy());
				188
				189	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				190	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				191	// here, so that constant hoisting will ignore this constant.
				192	if (BitSize == 0)
				193	return TCC_Free;
				194
				195	unsigned ImmIdx = ~0U;
				196	switch (Opcode) {
				197	default:
				198	return TCC_Free;
				199	case Instruction::GetElementPtr:
				200	// Always hoist the base address of a GetElementPtr.
				201	if (Idx == 0)
				202	return 2 * TCC_Basic;
				203	return TCC_Free;
				204	case Instruction::Store:
				205	ImmIdx = 0;
				206	break;
				207	case Instruction::Add:
				208	case Instruction::Sub:
				209	case Instruction::Mul:
				210	case Instruction::UDiv:
				211	case Instruction::SDiv:
				212	case Instruction::URem:
				213	case Instruction::SRem:
				214	case Instruction::And:
				215	case Instruction::Or:
				216	case Instruction::Xor:
				217	case Instruction::ICmp:
				218	ImmIdx = 1;
				219	break;
				220	// Always return TCC_Free for the shift value of a shift instruction.
				221	case Instruction::Shl:
				222	case Instruction::LShr:
				223	case Instruction::AShr:
				224	if (Idx == 1)
				225	return TCC_Free;
				226	break;
				227	case Instruction::Trunc:
				228	case Instruction::ZExt:
				229	case Instruction::SExt:
				230	case Instruction::IntToPtr:
				231	case Instruction::PtrToInt:
				232	case Instruction::BitCast:
				233	case Instruction::PHI:
				234	case Instruction::Call:
				235	case Instruction::Select:
				236	case Instruction::Ret:
				237	case Instruction::Load:
				238	break;
				239	}
				240
				241	if (Idx == ImmIdx) {
				242	unsigned NumConstants = (BitSize + 63) / 64;
				243	unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
				244	return (Cost <= NumConstants * TCC_Basic)
				245	? static_cast<unsigned>(TCC_Free) : Cost;
				246	}
				247	return AArch64TTI::getIntImmCost(Imm, Ty);
				248	}
				249
				250	unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
				251	const APInt &Imm, Type *Ty) const {
				252	assert(Ty->isIntegerTy());
				253
				254	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				255	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				256	// here, so that constant hoisting will ignore this constant.
				257	if (BitSize == 0)
				258	return TCC_Free;
				259
				260	switch (IID) {
				261	default:
				262	return TCC_Free;
				263	case Intrinsic::sadd_with_overflow:
				264	case Intrinsic::uadd_with_overflow:
				265	case Intrinsic::ssub_with_overflow:
				266	case Intrinsic::usub_with_overflow:
				267	case Intrinsic::smul_with_overflow:
				268	case Intrinsic::umul_with_overflow:
				269	if (Idx == 1) {
				270	unsigned NumConstants = (BitSize + 63) / 64;
				271	unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
				272	return (Cost <= NumConstants * TCC_Basic)
				273	? static_cast<unsigned>(TCC_Free) : Cost;
				274	}
				275	break;
				276	case Intrinsic::experimental_stackmap:
				277	if ((Idx < 2) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
				278	return TCC_Free;
				279	break;
				280	case Intrinsic::experimental_patchpoint_void:
				281	case Intrinsic::experimental_patchpoint_i64:
				282	if ((Idx < 4) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
				283	return TCC_Free;
				284	break;
				285	}
				286	return AArch64TTI::getIntImmCost(Imm, Ty);
				287	}
				288
				289	AArch64TTI::PopcntSupportKind
				290	AArch64TTI::getPopcntSupport(unsigned TyWidth) const {
				291	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
				292	if (TyWidth == 32 \|\| TyWidth == 64)
				293	return PSK_FastHardware;
				294	// TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
				295	return PSK_Software;
				296	}
				297
				298	unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
				299	Type *Src) const {
				300	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				301	assert(ISD && "Invalid opcode");
				302
				303	EVT SrcTy = TLI->getValueType(Src);
				304	EVT DstTy = TLI->getValueType(Dst);
				305
				306	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
				307	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				308
				309	static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
				310	// LowerVectorINT_TO_FP:
				311	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	312	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	313	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
				314	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	315	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	316	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	317
				318	// Complex: to v2f32
				319	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
				320	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	321	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	322	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
				323	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	324	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	325
				326	// Complex: to v4f32
				327	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
				328	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				329	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
				330	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				331
				332	// Complex: to v2f64
				333	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
				334	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
				335	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
				336	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
				337	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
				338	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
				339
				340
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	341	// LowerVectorFP_TO_INT
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	342	{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	343	{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
				344	{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	345	{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	346	{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
				347	{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	348
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	349	// Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	350	{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	351	{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
				352	{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	353	{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	354	{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
				355	{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
				356
				357	// Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
				358	{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
				359	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	360	{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	361	{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
				362
				363	// Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
				364	{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
				365	{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
				366	{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
				367	{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
				368	{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
				369	{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	370	};
				371
				372	int Idx = ConvertCostTableLookup<MVT>(
				373	ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
				374	SrcTy.getSimpleVT());
				375	if (Idx != -1)
				376	return ConversionTbl[Idx].Cost;
				377
				378	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				379	}
				380
				381	unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
				382	unsigned Index) const {
				383	assert(Val->isVectorTy() && "This must be a vector type");
				384
				385	if (Index != -1U) {
				386	// Legalize the type.
				387	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
				388
				389	// This type is legalized to a scalar type.
				390	if (!LT.second.isVector())
				391	return 0;
				392
				393	// The type may be split. Normalize the index to the new type.
				394	unsigned Width = LT.second.getVectorNumElements();
				395	Index = Index % Width;
				396
				397	// The element at index zero is already inside the vector.
				398	if (Index == 0)
				399	return 0;
				400	}
				401
				402	// All other insert/extracts cost this much.
				403	return 2;
				404	}
				405
				406	unsigned AArch64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
				407	OperandValueKind Opd1Info,
				408	OperandValueKind Opd2Info) const {
				409	// Legalize the type.
				410	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
				411
				412	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				413
				414	switch (ISD) {
				415	default:
				416	return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
				417	Opd2Info);
				418	case ISD::ADD:
				419	case ISD::MUL:
				420	case ISD::XOR:
				421	case ISD::OR:
				422	case ISD::AND:
				423	// These nodes are marked as 'custom' for combining purposes only.
				424	// We know that they are legal. See LowerAdd in ISelLowering.
				425	return 1 * LT.first;
				426	}
				427	}
				428
				429	unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
				430	// Address computations in vectorized code with non-consecutive addresses will
				431	// likely result in more instructions compared to scalar code where the
				432	// computation can more often be merged into the index mode. The resulting
				433	// extra micro-ops can significantly decrease throughput.
				434	unsigned NumVectorInstToHideOverhead = 10;
				435
				436	if (Ty->isVectorTy() && IsComplex)
				437	return NumVectorInstToHideOverhead;
				438
				439	// In many cases the address computation is not merged into the instruction
				440	// addressing mode.
				441	return 1;
				442	}
				443
				444	unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				445	Type *CondTy) const {
				446
				447	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				448	// We don't lower vector selects well that are wider than the register width.
				449	if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
				450	// We would need this many instructions to hide the scalarization happening.
				451	unsigned AmortizationCost = 20;
				452	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
				453	VectorSelectTbl[] = {
				454	{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
				455	{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
				456	{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
				457	{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
				458	{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
				459	{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
				460	};
				461
				462	EVT SelCondTy = TLI->getValueType(CondTy);
				463	EVT SelValTy = TLI->getValueType(ValTy);
				464	if (SelCondTy.isSimple() && SelValTy.isSimple()) {
				465	int Idx =
				466	ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
				467	SelValTy.getSimpleVT());
				468	if (Idx != -1)
				469	return VectorSelectTbl[Idx].Cost;
				470	}
				471	}
				472	return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
				473	}
				474
				475	unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
				476	unsigned Alignment,
				477	unsigned AddressSpace) const {
				478	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
				479
				480	if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
				481	Src->getVectorElementType()->isIntegerTy(64)) {
				482	// Unaligned stores are extremely inefficient. We don't split
				483	// unaligned v2i64 stores because the negative impact that has shown in
				484	// practice on inlined memcpy code.
				485	// We make v2i64 stores expensive so that we will only vectorize if there
				486	// are 6 other instructions getting vectorized.
				487	unsigned AmortizationCost = 6;
				488
				489	return LT.first * 2 * AmortizationCost;
				490	}
				491
				492	if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
				493	Src->getVectorNumElements() < 8) {
				494	// We scalarize the loads/stores because there is not v.4b register and we
				495	// have to promote the elements to v.4h.
				496	unsigned NumVecElts = Src->getVectorNumElements();
				497	unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
				498	// We generate 2 instructions per vector element.
				499	return NumVectorizableInstsToAmortize * NumVecElts * 2;
				500	}
				501
				502	return LT.first;
				503	}
James Molloy	2b8933c	2014-08-05 12:30:34 +0000	[diff] [blame^]	504
				505	unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
				506	unsigned Cost = 0;
				507	for (auto *I : Tys) {
				508	if (!I->isVectorTy())
				509	continue;
				510	if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
				511	Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
				512	getMemoryOpCost(Instruction::Load, I, 128, 0);
				513	}
				514	return Cost;
				515	}