Blame - llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp - toolchain/llvm-project

blob: 7c6f55c06bcef1f917adcddca27533b389179b5a [file] [log] [blame]

Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	1	//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	9
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	10	#include "AArch64TargetTransformInfo.h"
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	11	#include "MCTargetDesc/AArch64AddressingModes.h"
				12	#include "llvm/Analysis/TargetTransformInfo.h"
Kevin Qin	aef6841	2015-03-09 06:14:28 +0000	[diff] [blame]	13	#include "llvm/Analysis/LoopInfo.h"
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	14	#include "llvm/CodeGen/BasicTTIImpl.h"
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	15	#include "llvm/Support/Debug.h"
				16	#include "llvm/Target/CostTable.h"
				17	#include "llvm/Target/TargetLowering.h"
				18	#include <algorithm>
				19	using namespace llvm;
				20
				21	#define DEBUG_TYPE "aarch64tti"
				22
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	23	/// \brief Calculate the cost of materializing a 64-bit value. This helper
				24	/// method might only calculate a fraction of a larger immediate. Therefore it
				25	/// is valid to return a cost of ZERO.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	26	int AArch64TTIImpl::getIntImmCost(int64_t Val) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	27	// Check if the immediate can be encoded within an instruction.
				28	if (Val == 0 \|\| AArch64_AM::isLogicalImmediate(Val, 64))
				29	return 0;
				30
				31	if (Val < 0)
				32	Val = ~Val;
				33
				34	// Calculate how many moves we will need to materialize this constant.
				35	unsigned LZ = countLeadingZeros((uint64_t)Val);
				36	return (64 - LZ + 15) / 16;
				37	}
				38
				39	/// \brief Calculate the cost of materializing the given constant.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	40	int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	41	assert(Ty->isIntegerTy());
				42
				43	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				44	if (BitSize == 0)
				45	return ~0U;
				46
				47	// Sign-extend all constants to a multiple of 64-bit.
				48	APInt ImmVal = Imm;
				49	if (BitSize & 0x3f)
				50	ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
				51
				52	// Split the constant into 64-bit chunks and calculate the cost for each
				53	// chunk.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	54	int Cost = 0;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	55	for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
				56	APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
				57	int64_t Val = Tmp.getSExtValue();
				58	Cost += getIntImmCost(Val);
				59	}
				60	// We need at least one instruction to materialze the constant.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	61	return std::max(1, Cost);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	62	}
				63
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	64	int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
				65	const APInt &Imm, Type *Ty) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	66	assert(Ty->isIntegerTy());
				67
				68	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				69	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				70	// here, so that constant hoisting will ignore this constant.
				71	if (BitSize == 0)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	72	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	73
				74	unsigned ImmIdx = ~0U;
				75	switch (Opcode) {
				76	default:
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	77	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	78	case Instruction::GetElementPtr:
				79	// Always hoist the base address of a GetElementPtr.
				80	if (Idx == 0)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	81	return 2 * TTI::TCC_Basic;
				82	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	83	case Instruction::Store:
				84	ImmIdx = 0;
				85	break;
				86	case Instruction::Add:
				87	case Instruction::Sub:
				88	case Instruction::Mul:
				89	case Instruction::UDiv:
				90	case Instruction::SDiv:
				91	case Instruction::URem:
				92	case Instruction::SRem:
				93	case Instruction::And:
				94	case Instruction::Or:
				95	case Instruction::Xor:
				96	case Instruction::ICmp:
				97	ImmIdx = 1;
				98	break;
				99	// Always return TCC_Free for the shift value of a shift instruction.
				100	case Instruction::Shl:
				101	case Instruction::LShr:
				102	case Instruction::AShr:
				103	if (Idx == 1)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	104	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	105	break;
				106	case Instruction::Trunc:
				107	case Instruction::ZExt:
				108	case Instruction::SExt:
				109	case Instruction::IntToPtr:
				110	case Instruction::PtrToInt:
				111	case Instruction::BitCast:
				112	case Instruction::PHI:
				113	case Instruction::Call:
				114	case Instruction::Select:
				115	case Instruction::Ret:
				116	case Instruction::Load:
				117	break;
				118	}
				119
				120	if (Idx == ImmIdx) {
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	121	int NumConstants = (BitSize + 63) / 64;
				122	int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	123	return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	124	? static_cast<int>(TTI::TCC_Free)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	125	: Cost;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	126	}
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	127	return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	128	}
				129
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	130	int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
				131	const APInt &Imm, Type *Ty) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	132	assert(Ty->isIntegerTy());
				133
				134	unsigned BitSize = Ty->getPrimitiveSizeInBits();
				135	// There is no cost model for constants with a bit size of 0. Return TCC_Free
				136	// here, so that constant hoisting will ignore this constant.
				137	if (BitSize == 0)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	138	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	139
				140	switch (IID) {
				141	default:
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	142	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	143	case Intrinsic::sadd_with_overflow:
				144	case Intrinsic::uadd_with_overflow:
				145	case Intrinsic::ssub_with_overflow:
				146	case Intrinsic::usub_with_overflow:
				147	case Intrinsic::smul_with_overflow:
				148	case Intrinsic::umul_with_overflow:
				149	if (Idx == 1) {
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	150	int NumConstants = (BitSize + 63) / 64;
				151	int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	152	return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	153	? static_cast<int>(TTI::TCC_Free)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	154	: Cost;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	155	}
				156	break;
				157	case Intrinsic::experimental_stackmap:
				158	if ((Idx < 2) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	159	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	160	break;
				161	case Intrinsic::experimental_patchpoint_void:
				162	case Intrinsic::experimental_patchpoint_i64:
				163	if ((Idx < 4) \|\| (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	164	return TTI::TCC_Free;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	165	break;
				166	}
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	167	return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	168	}
				169
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	170	TargetTransformInfo::PopcntSupportKind
				171	AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	172	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
				173	if (TyWidth == 32 \|\| TyWidth == 64)
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	174	return TTI::PSK_FastHardware;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	175	// TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	176	return TTI::PSK_Software;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	177	}
				178
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame^]	179	bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
				180	ArrayRef<const Value *> Args) {
				181
				182	// A helper that returns a vector type from the given type. The number of
				183	// elements in type Ty determine the vector width.
				184	auto toVectorTy = [&](Type *ArgTy) {
				185	return VectorType::get(ArgTy->getScalarType(),
				186	DstTy->getVectorNumElements());
				187	};
				188
				189	// Exit early if DstTy is not a vector type whose elements are at least
				190	// 16-bits wide.
				191	if (!DstTy->isVectorTy() \|\| DstTy->getScalarSizeInBits() < 16)
				192	return false;
				193
				194	// Determine if the operation has a widening variant. We consider both the
				195	// "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
				196	// instructions.
				197	//
				198	// TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
				199	// verify that their extending operands are eliminated during code
				200	// generation.
				201	switch (Opcode) {
				202	case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
				203	case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
				204	break;
				205	default:
				206	return false;
				207	}
				208
				209	// To be a widening instruction (either the "wide" or "long" versions), the
				210	// second operand must be a sign- or zero extend having a single user. We
				211	// only consider extends having a single user because they may otherwise not
				212	// be eliminated.
				213	if (Args.size() != 2 \|\|
				214	(!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) \|\|
				215	!Args[1]->hasOneUse())
				216	return false;
				217	auto *Extend = cast<CastInst>(Args[1]);
				218
				219	// Legalize the destination type and ensure it can be used in a widening
				220	// operation.
				221	auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
				222	unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
				223	if (!DstTyL.second.isVector() \|\| DstElTySize != DstTy->getScalarSizeInBits())
				224	return false;
				225
				226	// Legalize the source type and ensure it can be used in a widening
				227	// operation.
				228	Type *SrcTy = toVectorTy(Extend->getSrcTy());
				229	auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
				230	unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
				231	if (!SrcTyL.second.isVector() \|\| SrcElTySize != SrcTy->getScalarSizeInBits())
				232	return false;
				233
				234	// Get the total number of vector elements in the legalized types.
				235	unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
				236	unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
				237
				238	// Return true if the legalized types have the same number of vector elements
				239	// and the destination element type size is twice that of the source type.
				240	return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
				241	}
				242
Jonas Paulsson	fccc7d6	2017-04-12 11:49:08 +0000	[diff] [blame]	243	int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
				244	const Instruction *I) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	245	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				246	assert(ISD && "Invalid opcode");
				247
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame^]	248	// If the cast is observable, and it is used by a widening instruction (e.g.,
				249	// uaddl, saddw, etc.), it may be free.
				250	if (I && I->hasOneUse()) {
				251	auto SingleUser = cast<Instruction>(I->user_begin());
				252	SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
				253	if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
				254	// If the cast is the second operand, it is free. We will generate either
				255	// a "wide" or "long" version of the widening instruction.
				256	if (I == SingleUser->getOperand(1))
				257	return 0;
				258	// If the cast is not the second operand, it will be free if it looks the
				259	// same as the second operand. In this case, we will generate a "long"
				260	// version of the widening instruction.
				261	if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
				262	if (I->getOpcode() == Cast->getOpcode() &&
				263	cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
				264	return 0;
				265	}
				266	}
				267
Mehdi Amini	44ede33	2015-07-09 02:09:04 +0000	[diff] [blame]	268	EVT SrcTy = TLI->getValueType(DL, Src);
				269	EVT DstTy = TLI->getValueType(DL, Dst);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	270
				271	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	272	return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	273
Craig Topper	4b27576	2015-10-28 04:02:12 +0000	[diff] [blame]	274	static const TypeConversionCostTblEntry
Craig Topper	7bf52c9	2015-10-25 00:27:14 +0000	[diff] [blame]	275	ConversionTbl[] = {
Matthew Simpson	343af07	2015-11-18 18:03:06 +0000	[diff] [blame]	276	{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
				277	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
				278	{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
				279	{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
Silviu Baranga	b322aa6	2015-08-17 16:05:09 +0000	[diff] [blame]	280
				281	// The number of shll instructions for the extension.
Matthew Simpson	343af07	2015-11-18 18:03:06 +0000	[diff] [blame]	282	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				283	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
				284	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
				285	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
				286	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				287	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
				288	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
				289	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
				290	{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
				291	{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
				292	{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
				293	{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
				294	{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
				295	{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
Silviu Baranga	b322aa6	2015-08-17 16:05:09 +0000	[diff] [blame]	296	{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
				297	{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
				298
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	299	// LowerVectorINT_TO_FP:
				300	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	301	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	302	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
				303	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	304	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	305	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	306
				307	// Complex: to v2f32
				308	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
				309	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	310	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	311	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
				312	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	313	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	314
				315	// Complex: to v4f32
				316	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
				317	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				318	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
				319	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
				320
Silviu Baranga	b322aa6	2015-08-17 16:05:09 +0000	[diff] [blame]	321	// Complex: to v8f32
				322	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
				323	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
				324	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
				325	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
				326
				327	// Complex: to v16f32
				328	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
				329	{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
				330
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	331	// Complex: to v2f64
				332	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
				333	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
				334	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
				335	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
				336	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
				337	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
				338
				339
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	340	// LowerVectorFP_TO_INT
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	341	{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	342	{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
				343	{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	344	{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	345	{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
				346	{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	347
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	348	// Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	349	{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	350	{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
				351	{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	352	{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	353	{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
				354	{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
				355
				356	// Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
				357	{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
				358	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
Tim Northover	ef0d760	2014-06-15 09:27:06 +0000	[diff] [blame]	359	{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Tim Northover	dbecc3b	2014-06-15 09:27:15 +0000	[diff] [blame]	360	{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
				361
				362	// Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
				363	{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
				364	{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
				365	{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
				366	{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
				367	{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
				368	{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	369	};
				370
Craig Topper	ee0c859	2015-10-27 04:14:24 +0000	[diff] [blame]	371	if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
				372	DstTy.getSimpleVT(),
				373	SrcTy.getSimpleVT()))
				374	return Entry->Cost;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	375
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	376	return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	377	}
				378
Matthew Simpson	e5dfb08	2016-04-27 15:20:21 +0000	[diff] [blame]	379	int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
				380	VectorType *VecTy,
				381	unsigned Index) {
				382
				383	// Make sure we were given a valid extend opcode.
Matthew Simpson	47bd399	2016-04-27 16:25:04 +0000	[diff] [blame]	384	assert((Opcode == Instruction::SExt \|\| Opcode == Instruction::ZExt) &&
				385	"Invalid opcode");
Matthew Simpson	e5dfb08	2016-04-27 15:20:21 +0000	[diff] [blame]	386
				387	// We are extending an element we extract from a vector, so the source type
				388	// of the extend is the element type of the vector.
				389	auto *Src = VecTy->getElementType();
				390
				391	// Sign- and zero-extends are for integer types only.
				392	assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
				393
				394	// Get the cost for the extract. We compute the cost (if any) for the extend
				395	// below.
				396	auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
				397
				398	// Legalize the types.
				399	auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
				400	auto DstVT = TLI->getValueType(DL, Dst);
				401	auto SrcVT = TLI->getValueType(DL, Src);
				402
				403	// If the resulting type is still a vector and the destination type is legal,
				404	// we may get the extension for free. If not, get the default cost for the
				405	// extend.
				406	if (!VecLT.second.isVector() \|\| !TLI->isTypeLegal(DstVT))
				407	return Cost + getCastInstrCost(Opcode, Dst, Src);
				408
				409	// The destination type should be larger than the element type. If not, get
				410	// the default cost for the extend.
				411	if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
				412	return Cost + getCastInstrCost(Opcode, Dst, Src);
				413
				414	switch (Opcode) {
				415	default:
				416	llvm_unreachable("Opcode should be either SExt or ZExt");
				417
				418	// For sign-extends, we only need a smov, which performs the extension
				419	// automatically.
				420	case Instruction::SExt:
				421	return Cost;
				422
				423	// For zero-extends, the extend is performed automatically by a umov unless
				424	// the destination type is i64 and the element type is i8 or i16.
				425	case Instruction::ZExt:
				426	if (DstVT.getSizeInBits() != 64u \|\| SrcVT.getSizeInBits() == 32u)
				427	return Cost;
				428	}
				429
				430	// If we are unable to perform the extend for free, get the default cost.
				431	return Cost + getCastInstrCost(Opcode, Dst, Src);
				432	}
				433
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	434	int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
				435	unsigned Index) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	436	assert(Val->isVectorTy() && "This must be a vector type");
				437
				438	if (Index != -1U) {
				439	// Legalize the type.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	440	std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	441
				442	// This type is legalized to a scalar type.
				443	if (!LT.second.isVector())
				444	return 0;
				445
				446	// The type may be split. Normalize the index to the new type.
				447	unsigned Width = LT.second.getVectorNumElements();
				448	Index = Index % Width;
				449
				450	// The element at index zero is already inside the vector.
				451	if (Index == 0)
				452	return 0;
				453	}
				454
				455	// All other insert/extracts cost this much.
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	456	return ST->getVectorInsertExtractBaseCost();
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	457	}
				458
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	459	int AArch64TTIImpl::getArithmeticInstrCost(
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	460	unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
				461	TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
Mohammed Agabaria	2c96c43	2017-01-11 08:23:37 +0000	[diff] [blame]	462	TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	463	// Legalize the type.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	464	std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	465
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame^]	466	// If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
				467	// add in the widening overhead specified by the sub-target. Since the
				468	// extends feeding widening instructions are performed automatically, they
				469	// aren't present in the generated code and have a zero cost. By adding a
				470	// widening overhead here, we attach the total cost of the combined operation
				471	// to the widening instruction.
				472	int Cost = 0;
				473	if (isWideningInstruction(Ty, Opcode, Args))
				474	Cost += ST->getWideningBaseCost();
				475
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	476	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				477
Chad Rosier	70d54ac	2014-09-29 13:59:31 +0000	[diff] [blame]	478	if (ISD == ISD::SDIV &&
				479	Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
				480	Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
				481	// On AArch64, scalar signed division by constants power-of-two are
				482	// normally expanded to the sequence ADD + CMP + SELECT + SRA.
				483	// The OperandValue properties many not be same as that of previous
				484	// operation; conservatively assume OP_None.
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame^]	485	Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
				486	TargetTransformInfo::OP_None,
				487	TargetTransformInfo::OP_None);
Chad Rosier	70d54ac	2014-09-29 13:59:31 +0000	[diff] [blame]	488	Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
				489	TargetTransformInfo::OP_None,
				490	TargetTransformInfo::OP_None);
				491	Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
				492	TargetTransformInfo::OP_None,
				493	TargetTransformInfo::OP_None);
				494	Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
				495	TargetTransformInfo::OP_None,
				496	TargetTransformInfo::OP_None);
				497	return Cost;
				498	}
				499
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	500	switch (ISD) {
				501	default:
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame^]	502	return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
				503	Opd1PropInfo, Opd2PropInfo);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	504	case ISD::ADD:
				505	case ISD::MUL:
				506	case ISD::XOR:
				507	case ISD::OR:
				508	case ISD::AND:
				509	// These nodes are marked as 'custom' for combining purposes only.
				510	// We know that they are legal. See LowerAdd in ISelLowering.
Matthew Simpson	78fd46b	2017-05-09 20:18:12 +0000	[diff] [blame^]	511	return (Cost + 1) * LT.first;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	512	}
				513	}
				514
Mohammed Agabaria	23599ba	2017-01-05 14:03:41 +0000	[diff] [blame]	515	int AArch64TTIImpl::getAddressComputationCost(Type Ty, ScalarEvolution SE,
				516	const SCEV *Ptr) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	517	// Address computations in vectorized code with non-consecutive addresses will
				518	// likely result in more instructions compared to scalar code where the
				519	// computation can more often be merged into the index mode. The resulting
				520	// extra micro-ops can significantly decrease throughput.
				521	unsigned NumVectorInstToHideOverhead = 10;
Mohammed Agabaria	23599ba	2017-01-05 14:03:41 +0000	[diff] [blame]	522	int MaxMergeDistance = 64;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	523
Mohammed Agabaria	23599ba	2017-01-05 14:03:41 +0000	[diff] [blame]	524	if (Ty->isVectorTy() && SE &&
				525	!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	526	return NumVectorInstToHideOverhead;
				527
				528	// In many cases the address computation is not merged into the instruction
				529	// addressing mode.
				530	return 1;
				531	}
				532
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	533	int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Jonas Paulsson	fccc7d6	2017-04-12 11:49:08 +0000	[diff] [blame]	534	Type CondTy, const Instruction I) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	535
				536	int ISD = TLI->InstructionOpcodeToISD(Opcode);
Silviu Baranga	a3e27ed	2015-09-09 15:35:02 +0000	[diff] [blame]	537	// We don't lower some vector selects well that are wider than the register
				538	// width.
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	539	if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
				540	// We would need this many instructions to hide the scalarization happening.
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	541	const int AmortizationCost = 20;
Craig Topper	4b27576	2015-10-28 04:02:12 +0000	[diff] [blame]	542	static const TypeConversionCostTblEntry
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	543	VectorSelectTbl[] = {
Silviu Baranga	a3e27ed	2015-09-09 15:35:02 +0000	[diff] [blame]	544	{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
				545	{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
				546	{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	547	{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
				548	{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
				549	{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
				550	};
				551
Mehdi Amini	44ede33	2015-07-09 02:09:04 +0000	[diff] [blame]	552	EVT SelCondTy = TLI->getValueType(DL, CondTy);
				553	EVT SelValTy = TLI->getValueType(DL, ValTy);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	554	if (SelCondTy.isSimple() && SelValTy.isSimple()) {
Craig Topper	ee0c859	2015-10-27 04:14:24 +0000	[diff] [blame]	555	if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
				556	SelCondTy.getSimpleVT(),
				557	SelValTy.getSimpleVT()))
				558	return Entry->Cost;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	559	}
				560	}
Jonas Paulsson	fccc7d6	2017-04-12 11:49:08 +0000	[diff] [blame]	561	return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	562	}
				563
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	564	int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
Jonas Paulsson	fccc7d6	2017-04-12 11:49:08 +0000	[diff] [blame]	565	unsigned Alignment, unsigned AddressSpace,
				566	const Instruction *I) {
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	567	auto LT = TLI->getTypeLegalizationCost(DL, Ty);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	568
Matthew Simpson	2c8de19	2016-12-15 18:36:59 +0000	[diff] [blame]	569	if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	570	LT.second.is128BitVector() && Alignment < 16) {
				571	// Unaligned stores are extremely inefficient. We don't split all
				572	// unaligned 128-bit stores because the negative impact that has shown in
				573	// practice on inlined block copy code.
				574	// We make such stores expensive so that we will only vectorize if there
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	575	// are 6 other instructions getting vectorized.
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	576	const int AmortizationCost = 6;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	577
				578	return LT.first * 2 * AmortizationCost;
				579	}
				580
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	581	if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8) &&
				582	Ty->getVectorNumElements() < 8) {
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	583	// We scalarize the loads/stores because there is not v.4b register and we
				584	// have to promote the elements to v.4h.
Evandro Menezes	330e1b8	2017-01-10 23:42:21 +0000	[diff] [blame]	585	unsigned NumVecElts = Ty->getVectorNumElements();
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	586	unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
				587	// We generate 2 instructions per vector element.
				588	return NumVectorizableInstsToAmortize * NumVecElts * 2;
				589	}
				590
				591	return LT.first;
				592	}
James Molloy	2b8933c	2014-08-05 12:30:34 +0000	[diff] [blame]	593
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	594	int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
				595	unsigned Factor,
				596	ArrayRef<unsigned> Indices,
				597	unsigned Alignment,
				598	unsigned AddressSpace) {
Hao Liu	7ec8ee3	2015-06-26 02:32:07 +0000	[diff] [blame]	599	assert(Factor >= 2 && "Invalid interleave factor");
				600	assert(isa<VectorType>(VecTy) && "Expect a vector type");
				601
				602	if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
				603	unsigned NumElts = VecTy->getVectorNumElements();
Matthew Simpson	1468d3e	2017-04-10 18:34:37 +0000	[diff] [blame]	604	auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
Hao Liu	7ec8ee3	2015-06-26 02:32:07 +0000	[diff] [blame]	605
				606	// ldN/stN only support legal vector types of size 64 or 128 in bits.
Matthew Simpson	aee9771	2017-03-02 15:15:35 +0000	[diff] [blame]	607	// Accesses having vector types that are a multiple of 128 bits can be
				608	// matched to more than one ldN/stN instruction.
Matthew Simpson	1468d3e	2017-04-10 18:34:37 +0000	[diff] [blame]	609	if (NumElts % Factor == 0 &&
				610	TLI->isLegalInterleavedAccessType(SubVecTy, DL))
				611	return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
Hao Liu	7ec8ee3	2015-06-26 02:32:07 +0000	[diff] [blame]	612	}
				613
				614	return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
				615	Alignment, AddressSpace);
				616	}
				617
Chandler Carruth	93205eb	2015-08-05 18:08:10 +0000	[diff] [blame]	618	int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
				619	int Cost = 0;
James Molloy	2b8933c	2014-08-05 12:30:34 +0000	[diff] [blame]	620	for (auto *I : Tys) {
				621	if (!I->isVectorTy())
				622	continue;
				623	if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
				624	Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
				625	getMemoryOpCost(Instruction::Load, I, 128, 0);
				626	}
				627	return Cost;
				628	}
James Molloy	a88896b	2014-08-21 00:02:51 +0000	[diff] [blame]	629
Wei Mi	062c744	2015-05-06 17:12:25 +0000	[diff] [blame]	630	unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	631	return ST->getMaxInterleaveFactor();
James Molloy	a88896b	2014-08-21 00:02:51 +0000	[diff] [blame]	632	}
Kevin Qin	72a799a	2014-10-09 10:13:27 +0000	[diff] [blame]	633
Chandler Carruth	ab5cb36	2015-02-01 14:31:23 +0000	[diff] [blame]	634	void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	635	TTI::UnrollingPreferences &UP) {
Kevin Qin	aef6841	2015-03-09 06:14:28 +0000	[diff] [blame]	636	// Enable partial unrolling and runtime unrolling.
				637	BaseT::getUnrollingPreferences(L, UP);
				638
				639	// For inner loop, it is more likely to be a hot one, and the runtime check
				640	// can be promoted out from LICM pass, so the overhead is less, let's try
				641	// a larger threshold to unroll more loops.
				642	if (L->getLoopDepth() > 1)
				643	UP.PartialThreshold *= 2;
				644
Kevin Qin	72a799a	2014-10-09 10:13:27 +0000	[diff] [blame]	645	// Disable partial & runtime unrolling on -Os.
				646	UP.PartialOptSizeThreshold = 0;
				647	}
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	648
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	649	Value AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
				650	Type *ExpectedType) {
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	651	switch (Inst->getIntrinsicID()) {
				652	default:
				653	return nullptr;
				654	case Intrinsic::aarch64_neon_st2:
				655	case Intrinsic::aarch64_neon_st3:
				656	case Intrinsic::aarch64_neon_st4: {
				657	// Create a struct type
				658	StructType *ST = dyn_cast<StructType>(ExpectedType);
				659	if (!ST)
				660	return nullptr;
				661	unsigned NumElts = Inst->getNumArgOperands() - 1;
				662	if (ST->getNumElements() != NumElts)
				663	return nullptr;
				664	for (unsigned i = 0, e = NumElts; i != e; ++i) {
				665	if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
				666	return nullptr;
				667	}
				668	Value *Res = UndefValue::get(ExpectedType);
				669	IRBuilder<> Builder(Inst);
				670	for (unsigned i = 0, e = NumElts; i != e; ++i) {
				671	Value *L = Inst->getArgOperand(i);
				672	Res = Builder.CreateInsertValue(Res, L, i);
				673	}
				674	return Res;
				675	}
				676	case Intrinsic::aarch64_neon_ld2:
				677	case Intrinsic::aarch64_neon_ld3:
				678	case Intrinsic::aarch64_neon_ld4:
				679	if (Inst->getType() == ExpectedType)
				680	return Inst;
				681	return nullptr;
				682	}
				683	}
				684
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	685	bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
				686	MemIntrinsicInfo &Info) {
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	687	switch (Inst->getIntrinsicID()) {
				688	default:
				689	break;
				690	case Intrinsic::aarch64_neon_ld2:
				691	case Intrinsic::aarch64_neon_ld3:
				692	case Intrinsic::aarch64_neon_ld4:
				693	Info.ReadMem = true;
				694	Info.WriteMem = false;
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	695	Info.PtrVal = Inst->getArgOperand(0);
				696	break;
				697	case Intrinsic::aarch64_neon_st2:
				698	case Intrinsic::aarch64_neon_st3:
				699	case Intrinsic::aarch64_neon_st4:
				700	Info.ReadMem = false;
				701	Info.WriteMem = true;
Chad Rosier	f9327d6	2015-01-26 22:51:15 +0000	[diff] [blame]	702	Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
				703	break;
				704	}
				705
				706	switch (Inst->getIntrinsicID()) {
				707	default:
				708	return false;
				709	case Intrinsic::aarch64_neon_ld2:
				710	case Intrinsic::aarch64_neon_st2:
				711	Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
				712	break;
				713	case Intrinsic::aarch64_neon_ld3:
				714	case Intrinsic::aarch64_neon_st3:
				715	Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
				716	break;
				717	case Intrinsic::aarch64_neon_ld4:
				718	case Intrinsic::aarch64_neon_st4:
				719	Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
				720	break;
				721	}
				722	return true;
				723	}
Adam Nemet	53e758f	2016-03-18 00:27:29 +0000	[diff] [blame]	724
Jun Bum Lim	dee5565	2017-04-03 19:20:07 +0000	[diff] [blame]	725	/// See if \p I should be considered for address type promotion. We check if \p
				726	/// I is a sext with right type and used in memory accesses. If it used in a
				727	/// "complex" getelementptr, we allow it to be promoted without finding other
				728	/// sext instructions that sign extended the same initial value. A getelementptr
				729	/// is considered as "complex" if it has more than 2 operands.
				730	bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
				731	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
				732	bool Considerable = false;
				733	AllowPromotionWithoutCommonHeader = false;
				734	if (!isa<SExtInst>(&I))
				735	return false;
				736	Type *ConsideredSExtType =
				737	Type::getInt64Ty(I.getParent()->getParent()->getContext());
				738	if (I.getType() != ConsideredSExtType)
				739	return false;
				740	// See if the sext is the one with the right type and used in at least one
				741	// GetElementPtrInst.
				742	for (const User *U : I.users()) {
				743	if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
				744	Considerable = true;
				745	// A getelementptr is considered as "complex" if it has more than 2
				746	// operands. We will promote a SExt used in such complex GEP as we
				747	// expect some computation to be merged if they are done on 64 bits.
				748	if (GEPInst->getNumOperands() > 2) {
				749	AllowPromotionWithoutCommonHeader = true;
				750	break;
				751	}
				752	}
				753	}
				754	return Considerable;
				755	}
				756
Adam Nemet	53e758f	2016-03-18 00:27:29 +0000	[diff] [blame]	757	unsigned AArch64TTIImpl::getCacheLineSize() {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	758	return ST->getCacheLineSize();
Adam Nemet	53e758f	2016-03-18 00:27:29 +0000	[diff] [blame]	759	}
				760
				761	unsigned AArch64TTIImpl::getPrefetchDistance() {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	762	return ST->getPrefetchDistance();
Adam Nemet	53e758f	2016-03-18 00:27:29 +0000	[diff] [blame]	763	}
Adam Nemet	6d8beec	2016-03-18 00:27:38 +0000	[diff] [blame]	764
				765	unsigned AArch64TTIImpl::getMinPrefetchStride() {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	766	return ST->getMinPrefetchStride();
Adam Nemet	6d8beec	2016-03-18 00:27:38 +0000	[diff] [blame]	767	}
Adam Nemet	709e304	2016-03-18 00:27:43 +0000	[diff] [blame]	768
				769	unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
Matthias Braun	651cff4	2016-06-02 18:03:53 +0000	[diff] [blame]	770	return ST->getMaxPrefetchIterationsAhead();
Adam Nemet	709e304	2016-03-18 00:27:43 +0000	[diff] [blame]	771	}