Blame - llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp - toolchain/llvm-project

blob: f5d038d52e1c83ade7178804e31b3c0a54bd27a3 [file] [log] [blame]

Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	1	//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	9
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	10	#include "AArch64TargetTransformInfo.h"
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	11	#include "MCTargetDesc/AArch64AddressingModes.h"
Kevin Qin	aef6841	2015-03-09 06:14:28 +0000	[diff] [blame]	12	#include "llvm/Analysis/LoopInfo.h"
Chandler Carruth	6bda14b	2017-06-06 11:49:48 +0000	[diff] [blame]	13	#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	14	#include "llvm/CodeGen/BasicTTIImpl.h"
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	15	#include "llvm/Support/Debug.h"
				16	#include "llvm/Target/CostTable.h"
				17	#include "llvm/Target/TargetLowering.h"
				18	#include <algorithm>
				19	using namespace llvm;
				20
				21	#define DEBUG_TYPE "aarch64tti"
				22
Florian Hahn	2665feb	2017-06-27 22:27:32 +0000	[diff] [blame]	23	bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
				24	const Function *Callee) const {
				25	const TargetMachine &TM = getTLI()->getTargetMachine();
				26
				27	const FeatureBitset &CallerBits =
				28	TM.getSubtargetImpl(*Caller)->getFeatureBits();
				29	const FeatureBitset &CalleeBits =
				30	TM.getSubtargetImpl(*Callee)->getFeatureBits();
				31
				32	// Inline a callee if its target-features are a subset of the callers
				33	// target-features.
				34	return (CallerBits & CalleeBits) == CalleeBits;
				35	}
				36
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	37	/// \brief Calculate the cost of materializing a 64-bit value. This helper
				38	/// method might only calculate a fraction of a larger immediate. Therefore it
				39	/// is valid to return a cost of ZERO.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	40	int AArch64TTIImpl::getIntImmCost(int64_t Val) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	41	// Check if the immediate can be encoded within an instruction.
				42	if (Val == 0 \|\| AArch64_AM::isLogicalImmediate(Val, 64))
				43	return 0;
				44
				45	if (Val < 0)
				46	Val = ~Val;
				47
				48	// Calculate how many moves we will need to materialize this constant.
				49	unsigned LZ = countLeadingZeros((uint64_t)Val);
				50	return (64 - LZ + 15) / 16;
				51	}
				52
				53	/// \brief Calculate the cost of materializing the given constant.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	54	int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	55	assert(Ty->isIntegerTy());
				56
				57	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				58	if (BitSize == 0)
				59	return ~0U;
				60
				61	// Sign-extend all constants to a multiple of 64-bit.
				62	APInt ImmVal = Imm;
				63	if (BitSize & 0x3f)
				64	ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
				65
				66	// Split the constant into 64-bit chunks and calculate the cost for each
				67	// chunk.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	68	int Cost = 0;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	69	for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
				70	APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
				71	int64_t Val = Tmp.getSExtValue();
				72	Cost += getIntImmCost(Val);
				73	}
				74	// We need at least one instruction to materialze the constant.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	75	return std::max(1, Cost);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	76	}
				77
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	78	int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
				79	const APInt &Imm, Type *Ty) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	80	assert(Ty->isIntegerTy());
				81
				82	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				83	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				84	// here, so that constant hoisting will ignore this constant.
				85	if (BitSize == 0)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	86	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	87
				88	unsigned ImmIdx = ~0U;
				89	switch (Opcode) {
				90	default:
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	91	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	92	case Instruction::GetElementPtr:
				93	// Always hoist the base address of a GetElementPtr.
				94	if (Idx == 0)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	95	return 2 * TTI::TCC_Basic;
				96	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	97	case Instruction::Store:
				98	ImmIdx = 0;
				99	break;
				100	case Instruction::Add:
				101	case Instruction::Sub:
				102	case Instruction::Mul:
				103	case Instruction::UDiv:
				104	case Instruction::SDiv:
				105	case Instruction::URem:
				106	case Instruction::SRem:
				107	case Instruction::And:
				108	case Instruction::Or:
				109	case Instruction::Xor:
				110	case Instruction::ICmp:
				111	ImmIdx = 1;
				112	break;
				113	// Always return TCC_Free for the shift value of a shift instruction.
				114	case Instruction::Shl:
				115	case Instruction::LShr:
				116	case Instruction::AShr:
				117	if (Idx == 1)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	118	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	119	break;
				120	case Instruction::Trunc:
				121	case Instruction::ZExt:
				122	case Instruction::SExt:
				123	case Instruction::IntToPtr:
				124	case Instruction::PtrToInt:
				125	case Instruction::BitCast:
				126	case Instruction::PHI:
				127	case Instruction::Call:
				128	case Instruction::Select:
				129	case Instruction::Ret:
				130	case Instruction::Load:
				131	break;
				132	}
				133
				134	if (Idx == ImmIdx) {
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	135	int NumConstants = (BitSize + 63) / 64;
				136	int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	137	return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	138	? static_cast<int>(TTI::TCC_Free)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	139	: Cost;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	140	}
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	141	return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	142	}
				143
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	144	int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
				145	const APInt &Imm, Type *Ty) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	146	assert(Ty->isIntegerTy());
				147
				148	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				149	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				150	// here, so that constant hoisting will ignore this constant.
				151	if (BitSize == 0)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	152	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	153
				154	switch (IID) {
				155	default:
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	156	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	157	case Intrinsic::sadd_with_overflow:
				158	case Intrinsic::uadd_with_overflow:
				159	case Intrinsic::ssub_with_overflow:
				160	case Intrinsic::usub_with_overflow:
				161	case Intrinsic::smul_with_overflow:
				162	case Intrinsic::umul_with_overflow:
				163	if (Idx == 1) {
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	164	int NumConstants = (BitSize + 63) / 64;
				165	int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	166	return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	167	? static_cast<int>(TTI::TCC_Free)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	168	: Cost;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	169	}
				170	break;
				171	case Intrinsic::experimental_stackmap:
				172	if ((Idx < 2) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	173	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	174	break;
				175	case Intrinsic::experimental_patchpoint_void:
				176	case Intrinsic::experimental_patchpoint_i64:
				177	if ((Idx < 4) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	178	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	179	break;
				180	}
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	181	return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	182	}
				183
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	184	TargetTransformInfo::PopcntSupportKind
				185	AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	186	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
				187	if (TyWidth == 32 \|\| TyWidth == 64)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	188	return TTI::PSK_FastHardware;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	189	// TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	190	return TTI::PSK_Software;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	191	}
				192
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame]	193	bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
				194	ArrayRef<const Value *> Args) {
				195
				196	// A helper that returns a vector type from the given type. The number of
				197	// elements in type Ty determine the vector width.
				198	auto toVectorTy = [&](Type *ArgTy) {
				199	return VectorType::get(ArgTy->getScalarType(),
				200	DstTy->getVectorNumElements());
				201	};
				202
				203	// Exit early if DstTy is not a vector type whose elements are at least
				204	// 16-bits wide.
				205	if (!DstTy->isVectorTy() \|\| DstTy->getScalarSizeInBits() < 16)
				206	return false;
				207
				208	// Determine if the operation has a widening variant. We consider both the
				209	// "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
				210	// instructions.
				211	//
				212	// TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
				213	// verify that their extending operands are eliminated during code
				214	// generation.
				215	switch (Opcode) {
				216	case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
				217	case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
				218	break;
				219	default:
				220	return false;
				221	}
				222
				223	// To be a widening instruction (either the "wide" or "long" versions), the
				224	// second operand must be a sign- or zero extend having a single user. We
				225	// only consider extends having a single user because they may otherwise not
				226	// be eliminated.
				227	if (Args.size() != 2 \|\|
				228	(!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) \|\|
				229	!Args[1]->hasOneUse())
				230	return false;
				231	auto *Extend = cast<CastInst>(Args[1]);
				232
				233	// Legalize the destination type and ensure it can be used in a widening
				234	// operation.
				235	auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
				236	unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
				237	if (!DstTyL.second.isVector() \|\| DstElTySize != DstTy->getScalarSizeInBits())
				238	return false;
				239
				240	// Legalize the source type and ensure it can be used in a widening
				241	// operation.
				242	Type *SrcTy = toVectorTy(Extend->getSrcTy());
				243	auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
				244	unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
				245	if (!SrcTyL.second.isVector() \|\| SrcElTySize != SrcTy->getScalarSizeInBits())
				246	return false;
				247
				248	// Get the total number of vector elements in the legalized types.
				249	unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
				250	unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
				251
				252	// Return true if the legalized types have the same number of vector elements
				253	// and the destination element type size is twice that of the source type.
				254	return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
				255	}
				256
Jonas Paulsson	fccc7d6	2017-04-12 11:49:08 +0000	[diff] [blame]	257	int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
				258	const Instruction *I) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	259	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				260	assert(ISD && "Invalid opcode");
				261
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame]	262	// If the cast is observable, and it is used by a widening instruction (e.g.,
				263	// uaddl, saddw, etc.), it may be free.
				264	if (I && I->hasOneUse()) {
				265	auto SingleUser = cast<Instruction>(I->user_begin());
				266	SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
				267	if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
				268	// If the cast is the second operand, it is free. We will generate either
				269	// a "wide" or "long" version of the widening instruction.
				270	if (I == SingleUser->getOperand(1))
				271	return 0;
				272	// If the cast is not the second operand, it will be free if it looks the
				273	// same as the second operand. In this case, we will generate a "long"
				274	// version of the widening instruction.
				275	if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
				276	if (I->getOpcode() == Cast->getOpcode() &&
				277	cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
				278	return 0;
				279	}
				280	}
				281
Mehdi Amini	44ede33	2015-07-09 02:09:04 +0000	[diff] [blame]	282	EVT SrcTy = TLI->getValueType(DL, Src);
				283	EVT DstTy = TLI->getValueType(DL, Dst);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	284
				285	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	286	return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	287
Craig Topper	4b27576	2015-10-28 04:02:12 +0000	[diff] [blame]	288	static const TypeConversionCostTblEntry
Craig Topper	7bf52c9	2015-10-25 00:27:14 +0000	[diff] [blame]	289	ConversionTbl[] = {
Matthew Simpson	343af07	2015-11-18 18:03:06 +0000	[diff] [blame]	290	{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
				291	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
				292	{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
				293	{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
Silviu Baranga	b322aa6	2015-08-17 16:05:09 +0000	[diff] [blame]	294
				295	// The number of shll instructions for the extension.
Matthew Simpson	343af07	2015-11-18 18:03:06 +0000	[diff] [blame]	296	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				297	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				298	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
				299	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
				300	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				301	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				302	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
				303	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
				304	{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
				305	{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
				306	{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
				307	{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
				308	{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
				309	{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
Silviu Baranga	b322aa6	2015-08-17 16:05:09 +0000	[diff] [blame]	310	{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
				311	{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
				312
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	313	// LowerVectorINT_TO_FP:
				314	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	315	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	316	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
				317	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	318	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	319	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	320
				321	// Complex: to v2f32
				322	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
				323	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	324	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	325	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
				326	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	327	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	328
				329	// Complex: to v4f32
				330	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
				331	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				332	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
				333	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				334
Silviu Baranga	b322aa6	2015-08-17 16:05:09 +0000	[diff] [blame]	335	// Complex: to v8f32
				336	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
				337	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
				338	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
				339	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
				340
				341	// Complex: to v16f32
				342	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
				343	{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
				344
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	345	// Complex: to v2f64
				346	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
				347	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
				348	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
				349	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
				350	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
				351	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
				352
				353
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	354	// LowerVectorFP_TO_INT
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	355	{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	356	{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
				357	{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	358	{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	359	{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
				360	{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	361
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	362	// Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	363	{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	364	{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
				365	{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	366	{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	367	{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
				368	{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
				369
				370	// Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
				371	{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
				372	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	373	{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	374	{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
				375
				376	// Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
				377	{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
				378	{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
				379	{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
				380	{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
				381	{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
				382	{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	383	};
				384
Craig Topper	ee0c859	2015-10-27 04:14:24 +0000	[diff] [blame]	385	if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
				386	DstTy.getSimpleVT(),
				387	SrcTy.getSimpleVT()))
				388	return Entry->Cost;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	389
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	390	return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	391	}
				392
Matthew Simpson	e5dfb08	2016-04-27 15:20:21 +0000	[diff] [blame]	393	int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
				394	VectorType *VecTy,
				395	unsigned Index) {
				396
				397	// Make sure we were given a valid extend opcode.
Matthew Simpson	47bd399	2016-04-27 16:25:04 +0000	[diff] [blame]	398	assert((Opcode == Instruction::SExt \|\| Opcode == Instruction::ZExt) &&
				399	"Invalid opcode");
Matthew Simpson	e5dfb08	2016-04-27 15:20:21 +0000	[diff] [blame]	400
				401	// We are extending an element we extract from a vector, so the source type
				402	// of the extend is the element type of the vector.
				403	auto *Src = VecTy->getElementType();
				404
				405	// Sign- and zero-extends are for integer types only.
				406	assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
				407
				408	// Get the cost for the extract. We compute the cost (if any) for the extend
				409	// below.
				410	auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
				411
				412	// Legalize the types.
				413	auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
				414	auto DstVT = TLI->getValueType(DL, Dst);
				415	auto SrcVT = TLI->getValueType(DL, Src);
				416
				417	// If the resulting type is still a vector and the destination type is legal,
				418	// we may get the extension for free. If not, get the default cost for the
				419	// extend.
				420	if (!VecLT.second.isVector() \|\| !TLI->isTypeLegal(DstVT))
				421	return Cost + getCastInstrCost(Opcode, Dst, Src);
				422
				423	// The destination type should be larger than the element type. If not, get
				424	// the default cost for the extend.
				425	if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
				426	return Cost + getCastInstrCost(Opcode, Dst, Src);
				427
				428	switch (Opcode) {
				429	default:
				430	llvm_unreachable("Opcode should be either SExt or ZExt");
				431
				432	// For sign-extends, we only need a smov, which performs the extension
				433	// automatically.
				434	case Instruction::SExt:
				435	return Cost;
				436
				437	// For zero-extends, the extend is performed automatically by a umov unless
				438	// the destination type is i64 and the element type is i8 or i16.
				439	case Instruction::ZExt:
				440	if (DstVT.getSizeInBits() != 64u \|\| SrcVT.getSizeInBits() == 32u)
				441	return Cost;
				442	}
				443
				444	// If we are unable to perform the extend for free, get the default cost.
				445	return Cost + getCastInstrCost(Opcode, Dst, Src);
				446	}
				447
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	448	int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
				449	unsigned Index) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	450	assert(Val->isVectorTy() && "This must be a vector type");
				451
				452	if (Index != -1U) {
				453	// Legalize the type.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	454	std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	455
				456	// This type is legalized to a scalar type.
				457	if (!LT.second.isVector())
				458	return 0;
				459
				460	// The type may be split. Normalize the index to the new type.
				461	unsigned Width = LT.second.getVectorNumElements();
				462	Index = Index % Width;
				463
				464	// The element at index zero is already inside the vector.
				465	if (Index == 0)
				466	return 0;
				467	}
				468
				469	// All other insert/extracts cost this much.
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	470	return ST->getVectorInsertExtractBaseCost();
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	471	}
				472
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	473	int AArch64TTIImpl::getArithmeticInstrCost(
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	474	unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
				475	TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
Mohammed Agabaria	2c96c43	2017-01-11 08:23:37 +0000	[diff] [blame]	476	TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	477	// Legalize the type.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	478	std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	479
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame]	480	// If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
				481	// add in the widening overhead specified by the sub-target. Since the
				482	// extends feeding widening instructions are performed automatically, they
				483	// aren't present in the generated code and have a zero cost. By adding a
				484	// widening overhead here, we attach the total cost of the combined operation
				485	// to the widening instruction.
				486	int Cost = 0;
				487	if (isWideningInstruction(Ty, Opcode, Args))
				488	Cost += ST->getWideningBaseCost();
				489
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	490	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				491
Chad Rosier	70d54ac	2014-09-29 13:59:31 +0000	[diff] [blame]	492	if (ISD == ISD::SDIV &&
				493	Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
				494	Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
				495	// On AArch64, scalar signed division by constants power-of-two are
				496	// normally expanded to the sequence ADD + CMP + SELECT + SRA.
				497	// The OperandValue properties many not be same as that of previous
				498	// operation; conservatively assume OP_None.
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame]	499	Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
				500	TargetTransformInfo::OP_None,
				501	TargetTransformInfo::OP_None);
Chad Rosier	70d54ac	2014-09-29 13:59:31 +0000	[diff] [blame]	502	Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
				503	TargetTransformInfo::OP_None,
				504	TargetTransformInfo::OP_None);
				505	Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
				506	TargetTransformInfo::OP_None,
				507	TargetTransformInfo::OP_None);
				508	Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
				509	TargetTransformInfo::OP_None,
				510	TargetTransformInfo::OP_None);
				511	return Cost;
				512	}
				513
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	514	switch (ISD) {
				515	default:
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame]	516	return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
				517	Opd1PropInfo, Opd2PropInfo);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	518	case ISD::ADD:
				519	case ISD::MUL:
				520	case ISD::XOR:
				521	case ISD::OR:
				522	case ISD::AND:
				523	// These nodes are marked as 'custom' for combining purposes only.
				524	// We know that they are legal. See LowerAdd in ISelLowering.
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame]	525	return (Cost + 1) * LT.first;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	526	}
				527	}
				528
Mohammed Agabaria	23599ba	2017-01-05 14:03:41 +0000	[diff] [blame]	529	int AArch64TTIImpl::getAddressComputationCost(Type Ty, ScalarEvolution SE,
				530	const SCEV *Ptr) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	531	// Address computations in vectorized code with non-consecutive addresses will
				532	// likely result in more instructions compared to scalar code where the
				533	// computation can more often be merged into the index mode. The resulting
				534	// extra micro-ops can significantly decrease throughput.
				535	unsigned NumVectorInstToHideOverhead = 10;
Mohammed Agabaria	23599ba	2017-01-05 14:03:41 +0000	[diff] [blame]	536	int MaxMergeDistance = 64;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	537
Mohammed Agabaria	23599ba	2017-01-05 14:03:41 +0000	[diff] [blame]	538	if (Ty->isVectorTy() && SE &&
				539	!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	540	return NumVectorInstToHideOverhead;
				541
				542	// In many cases the address computation is not merged into the instruction
				543	// addressing mode.
				544	return 1;
				545	}
				546
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	547	int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Jonas Paulsson	fccc7d6	2017-04-12 11:49:08 +0000	[diff] [blame]	548	Type CondTy, const Instruction I) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	549
				550	int ISD = TLI->InstructionOpcodeToISD(Opcode);
Silviu Baranga	a3e27ed	2015-09-09 15:35:02 +0000	[diff] [blame]	551	// We don't lower some vector selects well that are wider than the register
				552	// width.
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	553	if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
				554	// We would need this many instructions to hide the scalarization happening.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	555	const int AmortizationCost = 20;
Craig Topper	4b27576	2015-10-28 04:02:12 +0000	[diff] [blame]	556	static const TypeConversionCostTblEntry
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	557	VectorSelectTbl[] = {
Silviu Baranga	a3e27ed	2015-09-09 15:35:02 +0000	[diff] [blame]	558	{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
				559	{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
				560	{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	561	{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
				562	{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
				563	{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
				564	};
				565
Mehdi Amini	44ede33	2015-07-09 02:09:04 +0000	[diff] [blame]	566	EVT SelCondTy = TLI->getValueType(DL, CondTy);
				567	EVT SelValTy = TLI->getValueType(DL, ValTy);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	568	if (SelCondTy.isSimple() && SelValTy.isSimple()) {
Craig Topper	ee0c859	2015-10-27 04:14:24 +0000	[diff] [blame]	569	if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
				570	SelCondTy.getSimpleVT(),
				571	SelValTy.getSimpleVT()))
				572	return Entry->Cost;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	573	}
				574	}
Jonas Paulsson	fccc7d6	2017-04-12 11:49:08 +0000	[diff] [blame]	575	return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	576	}
				577
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	578	int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
Jonas Paulsson	fccc7d6	2017-04-12 11:49:08 +0000	[diff] [blame]	579	unsigned Alignment, unsigned AddressSpace,
				580	const Instruction *I) {
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	581	auto LT = TLI->getTypeLegalizationCost(DL, Ty);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	582
Matthew Simpson	2c8de19	2016-12-15 18:36:59 +0000	[diff] [blame]	583	if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	584	LT.second.is128BitVector() && Alignment < 16) {
				585	// Unaligned stores are extremely inefficient. We don't split all
				586	// unaligned 128-bit stores because the negative impact that has shown in
				587	// practice on inlined block copy code.
				588	// We make such stores expensive so that we will only vectorize if there
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	589	// are 6 other instructions getting vectorized.
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	590	const int AmortizationCost = 6;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	591
				592	return LT.first * 2 * AmortizationCost;
				593	}
				594
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	595	if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8) &&
				596	Ty->getVectorNumElements() < 8) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	597	// We scalarize the loads/stores because there is not v.4b register and we
				598	// have to promote the elements to v.4h.
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	599	unsigned NumVecElts = Ty->getVectorNumElements();
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	600	unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
				601	// We generate 2 instructions per vector element.
				602	return NumVectorizableInstsToAmortize * NumVecElts * 2;
				603	}
				604
				605	return LT.first;
				606	}
James Molloy	2b8933c	2014-08-05 12:30:34 +0000	[diff] [blame]	607
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	608	int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
				609	unsigned Factor,
				610	ArrayRef<unsigned> Indices,
				611	unsigned Alignment,
				612	unsigned AddressSpace) {
Hao Liu	7ec8ee3	2015-06-26 02:32:07 +0000	[diff] [blame]	613	assert(Factor >= 2 && "Invalid interleave factor");
				614	assert(isa<VectorType>(VecTy) && "Expect a vector type");
				615
				616	if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
				617	unsigned NumElts = VecTy->getVectorNumElements();
Matthew Simpson	1468d3e	2017-04-10 18:34:37 +0000	[diff] [blame]	618	auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
Hao Liu	7ec8ee3	2015-06-26 02:32:07 +0000	[diff] [blame]	619
				620	// ldN/stN only support legal vector types of size 64 or 128 in bits.
Matthew Simpson	aee9771	2017-03-02 15:15:35 +0000	[diff] [blame]	621	// Accesses having vector types that are a multiple of 128 bits can be
				622	// matched to more than one ldN/stN instruction.
Matthew Simpson	1468d3e	2017-04-10 18:34:37 +0000	[diff] [blame]	623	if (NumElts % Factor == 0 &&
				624	TLI->isLegalInterleavedAccessType(SubVecTy, DL))
				625	return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
Hao Liu	7ec8ee3	2015-06-26 02:32:07 +0000	[diff] [blame]	626	}
				627
				628	return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
				629	Alignment, AddressSpace);
				630	}
				631
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	632	int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
				633	int Cost = 0;
James Molloy	2b8933c	2014-08-05 12:30:34 +0000	[diff] [blame]	634	for (auto *I : Tys) {
				635	if (!I->isVectorTy())
				636	continue;
				637	if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
				638	Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
				639	getMemoryOpCost(Instruction::Load, I, 128, 0);
				640	}
				641	return Cost;
				642	}
James Molloy	a88896b	2014-08-21 00:02:51 +0000	[diff] [blame]	643
Wei Mi	062c744	2015-05-06 17:12:25 +0000	[diff] [blame]	644	unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	645	return ST->getMaxInterleaveFactor();
James Molloy	a88896b	2014-08-21 00:02:51 +0000	[diff] [blame]	646	}
Kevin Qin	72a799a	2014-10-09 10:13:27 +0000	[diff] [blame]	647
Geoff Berry	66d9bdb	2017-06-28 15:53:17 +0000	[diff] [blame^]	648	void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	649	TTI::UnrollingPreferences &UP) {
Kevin Qin	aef6841	2015-03-09 06:14:28 +0000	[diff] [blame]	650	// Enable partial unrolling and runtime unrolling.
Geoff Berry	66d9bdb	2017-06-28 15:53:17 +0000	[diff] [blame^]	651	BaseT::getUnrollingPreferences(L, SE, UP);
Kevin Qin	aef6841	2015-03-09 06:14:28 +0000	[diff] [blame]	652
				653	// For inner loop, it is more likely to be a hot one, and the runtime check
				654	// can be promoted out from LICM pass, so the overhead is less, let's try
				655	// a larger threshold to unroll more loops.
				656	if (L->getLoopDepth() > 1)
				657	UP.PartialThreshold *= 2;
				658
Kevin Qin	72a799a	2014-10-09 10:13:27 +0000	[diff] [blame]	659	// Disable partial & runtime unrolling on -Os.
				660	UP.PartialOptSizeThreshold = 0;
				661	}
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	662
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	663	Value AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
				664	Type *ExpectedType) {
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	665	switch (Inst->getIntrinsicID()) {
				666	default:
				667	return nullptr;
				668	case Intrinsic::aarch64_neon_st2:
				669	case Intrinsic::aarch64_neon_st3:
				670	case Intrinsic::aarch64_neon_st4: {
				671	// Create a struct type
				672	StructType *ST = dyn_cast<StructType>(ExpectedType);
				673	if (!ST)
				674	return nullptr;
				675	unsigned NumElts = Inst->getNumArgOperands() - 1;
				676	if (ST->getNumElements() != NumElts)
				677	return nullptr;
				678	for (unsigned i = 0, e = NumElts; i != e; ++i) {
				679	if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
				680	return nullptr;
				681	}
				682	Value *Res = UndefValue::get(ExpectedType);
				683	IRBuilder<> Builder(Inst);
				684	for (unsigned i = 0, e = NumElts; i != e; ++i) {
				685	Value *L = Inst->getArgOperand(i);
				686	Res = Builder.CreateInsertValue(Res, L, i);
				687	}
				688	return Res;
				689	}
				690	case Intrinsic::aarch64_neon_ld2:
				691	case Intrinsic::aarch64_neon_ld3:
				692	case Intrinsic::aarch64_neon_ld4:
				693	if (Inst->getType() == ExpectedType)
				694	return Inst;
				695	return nullptr;
				696	}
				697	}
				698
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	699	bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
				700	MemIntrinsicInfo &Info) {
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	701	switch (Inst->getIntrinsicID()) {
				702	default:
				703	break;
				704	case Intrinsic::aarch64_neon_ld2:
				705	case Intrinsic::aarch64_neon_ld3:
				706	case Intrinsic::aarch64_neon_ld4:
				707	Info.ReadMem = true;
				708	Info.WriteMem = false;
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	709	Info.PtrVal = Inst->getArgOperand(0);
				710	break;
				711	case Intrinsic::aarch64_neon_st2:
				712	case Intrinsic::aarch64_neon_st3:
				713	case Intrinsic::aarch64_neon_st4:
				714	Info.ReadMem = false;
				715	Info.WriteMem = true;
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	716	Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
				717	break;
				718	}
				719
				720	switch (Inst->getIntrinsicID()) {
				721	default:
				722	return false;
				723	case Intrinsic::aarch64_neon_ld2:
				724	case Intrinsic::aarch64_neon_st2:
				725	Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
				726	break;
				727	case Intrinsic::aarch64_neon_ld3:
				728	case Intrinsic::aarch64_neon_st3:
				729	Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
				730	break;
				731	case Intrinsic::aarch64_neon_ld4:
				732	case Intrinsic::aarch64_neon_st4:
				733	Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
				734	break;
				735	}
				736	return true;
				737	}
Adam Nemet	53e758f	2016-03-18 00:27:29 +0000	[diff] [blame]	738
Jun Bum Lim	dee5565	2017-04-03 19:20:07 +0000	[diff] [blame]	739	/// See if \p I should be considered for address type promotion. We check if \p
				740	/// I is a sext with right type and used in memory accesses. If it used in a
				741	/// "complex" getelementptr, we allow it to be promoted without finding other
				742	/// sext instructions that sign extended the same initial value. A getelementptr
				743	/// is considered as "complex" if it has more than 2 operands.
				744	bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
				745	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
				746	bool Considerable = false;
				747	AllowPromotionWithoutCommonHeader = false;
				748	if (!isa<SExtInst>(&I))
				749	return false;
				750	Type *ConsideredSExtType =
				751	Type::getInt64Ty(I.getParent()->getParent()->getContext());
				752	if (I.getType() != ConsideredSExtType)
				753	return false;
				754	// See if the sext is the one with the right type and used in at least one
				755	// GetElementPtrInst.
				756	for (const User *U : I.users()) {
				757	if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
				758	Considerable = true;
				759	// A getelementptr is considered as "complex" if it has more than 2
				760	// operands. We will promote a SExt used in such complex GEP as we
				761	// expect some computation to be merged if they are done on 64 bits.
				762	if (GEPInst->getNumOperands() > 2) {
				763	AllowPromotionWithoutCommonHeader = true;
				764	break;
				765	}
				766	}
				767	}
				768	return Considerable;
				769	}
				770
Adam Nemet	53e758f	2016-03-18 00:27:29 +0000	[diff] [blame]	771	unsigned AArch64TTIImpl::getCacheLineSize() {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	772	return ST->getCacheLineSize();
Adam Nemet	53e758f	2016-03-18 00:27:29 +0000	[diff] [blame]	773	}
				774
				775	unsigned AArch64TTIImpl::getPrefetchDistance() {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	776	return ST->getPrefetchDistance();
Adam Nemet	53e758f	2016-03-18 00:27:29 +0000	[diff] [blame]	777	}
Adam Nemet	6d8beec	2016-03-18 00:27:38 +0000	[diff] [blame]	778
				779	unsigned AArch64TTIImpl::getMinPrefetchStride() {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	780	return ST->getMinPrefetchStride();
Adam Nemet	6d8beec	2016-03-18 00:27:38 +0000	[diff] [blame]	781	}
Adam Nemet	709e304	2016-03-18 00:27:43 +0000	[diff] [blame]	782
				783	unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	784	return ST->getMaxPrefetchIterationsAhead();
Adam Nemet	709e304	2016-03-18 00:27:43 +0000	[diff] [blame]	785	}
Amara Emerson	c9916d7	2017-05-16 21:29:22 +0000	[diff] [blame]	786
				787	bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
				788	TTI::ReductionFlags Flags) const {
				789	assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
				790	unsigned ScalarBits = Ty->getScalarSizeInBits();
				791	switch (Opcode) {
				792	case Instruction::FAdd:
				793	case Instruction::FMul:
				794	case Instruction::And:
				795	case Instruction::Or:
				796	case Instruction::Xor:
				797	case Instruction::Mul:
				798	return false;
				799	case Instruction::Add:
				800	return ScalarBits * Ty->getVectorNumElements() >= 128;
				801	case Instruction::ICmp:
				802	return (ScalarBits < 64) &&
				803	(ScalarBits * Ty->getVectorNumElements() >= 128);
				804	case Instruction::FCmp:
				805	return Flags.NoNaN;
				806	default:
				807	llvm_unreachable("Unhandled reduction opcode");
				808	}
				809	return false;
				810	}