Blame - llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp - toolchain/llvm-project

blob: dbdf199a25cc4fb452adc95eee566f87a052530b [file] [log] [blame]

Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1	//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file implements a TargetTransformInfo analysis pass specific to the
				11	/// AArch64 target machine. It uses the target's detailed information to provide
				12	/// more precise answers to certain TTI queries, while letting the target
				13	/// independent and default TTI implementations handle the rest.
				14	///
				15	//===----------------------------------------------------------------------===//
				16
				17	#include "AArch64.h"
				18	#include "AArch64TargetMachine.h"
				19	#include "MCTargetDesc/AArch64AddressingModes.h"
				20	#include "llvm/Analysis/TargetTransformInfo.h"
				21	#include "llvm/Support/Debug.h"
				22	#include "llvm/Target/CostTable.h"
				23	#include "llvm/Target/TargetLowering.h"
				24	#include <algorithm>
				25	using namespace llvm;
				26
				27	#define DEBUG_TYPE "aarch64tti"
				28
				29	// Declare the pass initialization routine locally as target-specific passes
				30	// don't have a target-wide initialization entry point, and so we rely on the
				31	// pass constructor initialization.
				32	namespace llvm {
				33	void initializeAArch64TTIPass(PassRegistry &);
				34	}
				35
				36	namespace {
				37
				38	class AArch64TTI final : public ImmutablePass, public TargetTransformInfo {
				39	const AArch64TargetMachine *TM;
				40	const AArch64Subtarget *ST;
				41	const AArch64TargetLowering *TLI;
				42
				43	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
				44	/// are set if the result needs to be inserted and/or extracted from vectors.
				45	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
				46
				47	public:
				48	AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
				49	llvm_unreachable("This pass cannot be directly constructed");
				50	}
				51
				52	AArch64TTI(const AArch64TargetMachine *TM)
				53	: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
Eric Christopher	d913448	2014-08-04 21:25:23 +0000	[diff] [blame]	54	TLI(TM->getSubtargetImpl()->getTargetLowering()) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	55	initializeAArch64TTIPass(*PassRegistry::getPassRegistry());
				56	}
				57
				58	void initializePass() override { pushTTIStack(this); }
				59
				60	void getAnalysisUsage(AnalysisUsage &AU) const override {
				61	TargetTransformInfo::getAnalysisUsage(AU);
				62	}
				63
				64	/// Pass identification.
				65	static char ID;
				66
				67	/// Provide necessary pointer adjustments for the two base classes.
				68	void getAdjustedAnalysisPointer(const void ID) override {
				69	if (ID == &TargetTransformInfo::ID)
				70	return (TargetTransformInfo *)this;
				71	return this;
				72	}
				73
				74	/// \name Scalar TTI Implementations
				75	/// @{
				76	unsigned getIntImmCost(int64_t Val) const;
				77	unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
				78	unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				79	Type *Ty) const override;
				80	unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
				81	Type *Ty) const override;
				82	PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
				83
				84	/// @}
				85
				86	/// \name Vector TTI Implementations
				87	/// @{
				88
				89	unsigned getNumberOfRegisters(bool Vector) const override {
				90	if (Vector) {
				91	if (ST->hasNEON())
				92	return 32;
				93	return 0;
				94	}
				95	return 31;
				96	}
				97
				98	unsigned getRegisterBitWidth(bool Vector) const override {
				99	if (Vector) {
				100	if (ST->hasNEON())
				101	return 128;
				102	return 0;
				103	}
				104	return 64;
				105	}
				106
Sanjay Patel	b653de1	2014-09-10 17:58:16 +0000	[diff] [blame]	107	unsigned getMaxInterleaveFactor() const override;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	108
				109	unsigned getCastInstrCost(unsigned Opcode, Type Dst, Type Src) const
				110	override;
				111
				112	unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
				113	override;
				114
Karthik Bhat	7f33ff7	2014-08-25 04:56:54 +0000	[diff] [blame]	115	unsigned getArithmeticInstrCost(
				116	unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
				117	OperandValueKind Opd2Info = OK_AnyValue,
				118	OperandValueProperties Opd1PropInfo = OP_None,
				119	OperandValueProperties Opd2PropInfo = OP_None) const override;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	120
				121	unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
				122
				123	unsigned getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy) const
				124	override;
				125
				126	unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				127	unsigned AddressSpace) const override;
James Molloy	2b8933c	2014-08-05 12:30:34 +0000	[diff] [blame]	128
				129	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override;
				130
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	131	/// @}
				132	};
				133
				134	} // end anonymous namespace
				135
				136	INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti",
				137	"AArch64 Target Transform Info", true, true, false)
				138	char AArch64TTI::ID = 0;
				139
				140	ImmutablePass *
				141	llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) {
				142	return new AArch64TTI(TM);
				143	}
				144
				145	/// \brief Calculate the cost of materializing a 64-bit value. This helper
				146	/// method might only calculate a fraction of a larger immediate. Therefore it
				147	/// is valid to return a cost of ZERO.
				148	unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
				149	// Check if the immediate can be encoded within an instruction.
				150	if (Val == 0 \|\| AArch64_AM::isLogicalImmediate(Val, 64))
				151	return 0;
				152
				153	if (Val < 0)
				154	Val = ~Val;
				155
				156	// Calculate how many moves we will need to materialize this constant.
				157	unsigned LZ = countLeadingZeros((uint64_t)Val);
				158	return (64 - LZ + 15) / 16;
				159	}
				160
				161	/// \brief Calculate the cost of materializing the given constant.
				162	unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
				163	assert(Ty->isIntegerTy());
				164
				165	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				166	if (BitSize == 0)
				167	return ~0U;
				168
				169	// Sign-extend all constants to a multiple of 64-bit.
				170	APInt ImmVal = Imm;
				171	if (BitSize & 0x3f)
				172	ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
				173
				174	// Split the constant into 64-bit chunks and calculate the cost for each
				175	// chunk.
				176	unsigned Cost = 0;
				177	for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
				178	APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
				179	int64_t Val = Tmp.getSExtValue();
				180	Cost += getIntImmCost(Val);
				181	}
				182	// We need at least one instruction to materialze the constant.
				183	return std::max(1U, Cost);
				184	}
				185
				186	unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
				187	const APInt &Imm, Type *Ty) const {
				188	assert(Ty->isIntegerTy());
				189
				190	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				191	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				192	// here, so that constant hoisting will ignore this constant.
				193	if (BitSize == 0)
				194	return TCC_Free;
				195
				196	unsigned ImmIdx = ~0U;
				197	switch (Opcode) {
				198	default:
				199	return TCC_Free;
				200	case Instruction::GetElementPtr:
				201	// Always hoist the base address of a GetElementPtr.
				202	if (Idx == 0)
				203	return 2 * TCC_Basic;
				204	return TCC_Free;
				205	case Instruction::Store:
				206	ImmIdx = 0;
				207	break;
				208	case Instruction::Add:
				209	case Instruction::Sub:
				210	case Instruction::Mul:
				211	case Instruction::UDiv:
				212	case Instruction::SDiv:
				213	case Instruction::URem:
				214	case Instruction::SRem:
				215	case Instruction::And:
				216	case Instruction::Or:
				217	case Instruction::Xor:
				218	case Instruction::ICmp:
				219	ImmIdx = 1;
				220	break;
				221	// Always return TCC_Free for the shift value of a shift instruction.
				222	case Instruction::Shl:
				223	case Instruction::LShr:
				224	case Instruction::AShr:
				225	if (Idx == 1)
				226	return TCC_Free;
				227	break;
				228	case Instruction::Trunc:
				229	case Instruction::ZExt:
				230	case Instruction::SExt:
				231	case Instruction::IntToPtr:
				232	case Instruction::PtrToInt:
				233	case Instruction::BitCast:
				234	case Instruction::PHI:
				235	case Instruction::Call:
				236	case Instruction::Select:
				237	case Instruction::Ret:
				238	case Instruction::Load:
				239	break;
				240	}
				241
				242	if (Idx == ImmIdx) {
				243	unsigned NumConstants = (BitSize + 63) / 64;
				244	unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
				245	return (Cost <= NumConstants * TCC_Basic)
				246	? static_cast<unsigned>(TCC_Free) : Cost;
				247	}
				248	return AArch64TTI::getIntImmCost(Imm, Ty);
				249	}
				250
				251	unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
				252	const APInt &Imm, Type *Ty) const {
				253	assert(Ty->isIntegerTy());
				254
				255	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				256	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				257	// here, so that constant hoisting will ignore this constant.
				258	if (BitSize == 0)
				259	return TCC_Free;
				260
				261	switch (IID) {
				262	default:
				263	return TCC_Free;
				264	case Intrinsic::sadd_with_overflow:
				265	case Intrinsic::uadd_with_overflow:
				266	case Intrinsic::ssub_with_overflow:
				267	case Intrinsic::usub_with_overflow:
				268	case Intrinsic::smul_with_overflow:
				269	case Intrinsic::umul_with_overflow:
				270	if (Idx == 1) {
				271	unsigned NumConstants = (BitSize + 63) / 64;
				272	unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
				273	return (Cost <= NumConstants * TCC_Basic)
				274	? static_cast<unsigned>(TCC_Free) : Cost;
				275	}
				276	break;
				277	case Intrinsic::experimental_stackmap:
				278	if ((Idx < 2) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
				279	return TCC_Free;
				280	break;
				281	case Intrinsic::experimental_patchpoint_void:
				282	case Intrinsic::experimental_patchpoint_i64:
				283	if ((Idx < 4) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
				284	return TCC_Free;
				285	break;
				286	}
				287	return AArch64TTI::getIntImmCost(Imm, Ty);
				288	}
				289
				290	AArch64TTI::PopcntSupportKind
				291	AArch64TTI::getPopcntSupport(unsigned TyWidth) const {
				292	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
				293	if (TyWidth == 32 \|\| TyWidth == 64)
				294	return PSK_FastHardware;
				295	// TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
				296	return PSK_Software;
				297	}
				298
				299	unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
				300	Type *Src) const {
				301	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				302	assert(ISD && "Invalid opcode");
				303
				304	EVT SrcTy = TLI->getValueType(Src);
				305	EVT DstTy = TLI->getValueType(Dst);
				306
				307	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
				308	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				309
				310	static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
				311	// LowerVectorINT_TO_FP:
				312	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	313	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	314	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
				315	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	316	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	317	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	318
				319	// Complex: to v2f32
				320	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
				321	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	322	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	323	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
				324	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	325	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	326
				327	// Complex: to v4f32
				328	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
				329	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				330	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
				331	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				332
				333	// Complex: to v2f64
				334	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
				335	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
				336	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
				337	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
				338	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
				339	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
				340
				341
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	342	// LowerVectorFP_TO_INT
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	343	{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	344	{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
				345	{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	346	{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	347	{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
				348	{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	349
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	350	// Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	351	{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	352	{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
				353	{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	354	{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	355	{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
				356	{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
				357
				358	// Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
				359	{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
				360	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	361	{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	362	{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
				363
				364	// Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
				365	{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
				366	{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
				367	{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
				368	{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
				369	{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
				370	{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	371	};
				372
				373	int Idx = ConvertCostTableLookup<MVT>(
				374	ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
				375	SrcTy.getSimpleVT());
				376	if (Idx != -1)
				377	return ConversionTbl[Idx].Cost;
				378
				379	return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
				380	}
				381
				382	unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
				383	unsigned Index) const {
				384	assert(Val->isVectorTy() && "This must be a vector type");
				385
				386	if (Index != -1U) {
				387	// Legalize the type.
				388	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
				389
				390	// This type is legalized to a scalar type.
				391	if (!LT.second.isVector())
				392	return 0;
				393
				394	// The type may be split. Normalize the index to the new type.
				395	unsigned Width = LT.second.getVectorNumElements();
				396	Index = Index % Width;
				397
				398	// The element at index zero is already inside the vector.
				399	if (Index == 0)
				400	return 0;
				401	}
				402
				403	// All other insert/extracts cost this much.
				404	return 2;
				405	}
				406
Karthik Bhat	7f33ff7	2014-08-25 04:56:54 +0000	[diff] [blame]	407	unsigned AArch64TTI::getArithmeticInstrCost(
				408	unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
				409	OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
				410	OperandValueProperties Opd2PropInfo) const {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	411	// Legalize the type.
				412	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
				413
				414	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				415
Chad Rosier	70d54ac	2014-09-29 13:59:31 +0000	[diff] [blame^]	416	if (ISD == ISD::SDIV &&
				417	Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
				418	Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
				419	// On AArch64, scalar signed division by constants power-of-two are
				420	// normally expanded to the sequence ADD + CMP + SELECT + SRA.
				421	// The OperandValue properties many not be same as that of previous
				422	// operation; conservatively assume OP_None.
				423	unsigned Cost =
				424	getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
				425	TargetTransformInfo::OP_None,
				426	TargetTransformInfo::OP_None);
				427	Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
				428	TargetTransformInfo::OP_None,
				429	TargetTransformInfo::OP_None);
				430	Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
				431	TargetTransformInfo::OP_None,
				432	TargetTransformInfo::OP_None);
				433	Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
				434	TargetTransformInfo::OP_None,
				435	TargetTransformInfo::OP_None);
				436	return Cost;
				437	}
				438
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	439	switch (ISD) {
				440	default:
Karthik Bhat	7f33ff7	2014-08-25 04:56:54 +0000	[diff] [blame]	441	return TargetTransformInfo::getArithmeticInstrCost(
				442	Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	443	case ISD::ADD:
				444	case ISD::MUL:
				445	case ISD::XOR:
				446	case ISD::OR:
				447	case ISD::AND:
				448	// These nodes are marked as 'custom' for combining purposes only.
				449	// We know that they are legal. See LowerAdd in ISelLowering.
				450	return 1 * LT.first;
				451	}
				452	}
				453
				454	unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
				455	// Address computations in vectorized code with non-consecutive addresses will
				456	// likely result in more instructions compared to scalar code where the
				457	// computation can more often be merged into the index mode. The resulting
				458	// extra micro-ops can significantly decrease throughput.
				459	unsigned NumVectorInstToHideOverhead = 10;
				460
				461	if (Ty->isVectorTy() && IsComplex)
				462	return NumVectorInstToHideOverhead;
				463
				464	// In many cases the address computation is not merged into the instruction
				465	// addressing mode.
				466	return 1;
				467	}
				468
				469	unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				470	Type *CondTy) const {
				471
				472	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				473	// We don't lower vector selects well that are wider than the register width.
				474	if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
				475	// We would need this many instructions to hide the scalarization happening.
				476	unsigned AmortizationCost = 20;
				477	static const TypeConversionCostTblEntry<MVT::SimpleValueType>
				478	VectorSelectTbl[] = {
				479	{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
				480	{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
				481	{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
				482	{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
				483	{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
				484	{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
				485	};
				486
				487	EVT SelCondTy = TLI->getValueType(CondTy);
				488	EVT SelValTy = TLI->getValueType(ValTy);
				489	if (SelCondTy.isSimple() && SelValTy.isSimple()) {
				490	int Idx =
				491	ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
				492	SelValTy.getSimpleVT());
				493	if (Idx != -1)
				494	return VectorSelectTbl[Idx].Cost;
				495	}
				496	}
				497	return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
				498	}
				499
				500	unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
				501	unsigned Alignment,
				502	unsigned AddressSpace) const {
				503	std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
				504
				505	if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
				506	Src->getVectorElementType()->isIntegerTy(64)) {
				507	// Unaligned stores are extremely inefficient. We don't split
				508	// unaligned v2i64 stores because the negative impact that has shown in
				509	// practice on inlined memcpy code.
				510	// We make v2i64 stores expensive so that we will only vectorize if there
				511	// are 6 other instructions getting vectorized.
				512	unsigned AmortizationCost = 6;
				513
				514	return LT.first * 2 * AmortizationCost;
				515	}
				516
				517	if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
				518	Src->getVectorNumElements() < 8) {
				519	// We scalarize the loads/stores because there is not v.4b register and we
				520	// have to promote the elements to v.4h.
				521	unsigned NumVecElts = Src->getVectorNumElements();
				522	unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
				523	// We generate 2 instructions per vector element.
				524	return NumVectorizableInstsToAmortize * NumVecElts * 2;
				525	}
				526
				527	return LT.first;
				528	}
James Molloy	2b8933c	2014-08-05 12:30:34 +0000	[diff] [blame]	529
				530	unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
				531	unsigned Cost = 0;
				532	for (auto *I : Tys) {
				533	if (!I->isVectorTy())
				534	continue;
				535	if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
				536	Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
				537	getMemoryOpCost(Instruction::Load, I, 128, 0);
				538	}
				539	return Cost;
				540	}
James Molloy	a88896b	2014-08-21 00:02:51 +0000	[diff] [blame]	541
Sanjay Patel	b653de1	2014-09-10 17:58:16 +0000	[diff] [blame]	542	unsigned AArch64TTI::getMaxInterleaveFactor() const {
Gerolf Hoflehner	7b0abb8	2014-09-10 20:31:57 +0000	[diff] [blame]	543	if (ST->isCortexA57())
James Molloy	a88896b	2014-08-21 00:02:51 +0000	[diff] [blame]	544	return 4;
				545	return 2;
				546	}