blob: f1e9c6adb5312691843cf1652ac4ec870f24b05a [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file implements a TargetTransformInfo analysis pass specific to the
11/// AArch64 target machine. It uses the target's detailed information to provide
12/// more precise answers to certain TTI queries, while letting the target
13/// independent and default TTI implementations handle the rest.
14///
15//===----------------------------------------------------------------------===//
16
17#include "AArch64.h"
18#include "AArch64TargetMachine.h"
19#include "MCTargetDesc/AArch64AddressingModes.h"
20#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth705b1852015-01-31 03:43:40 +000021#include "llvm/CodeGen/BasicTTIImpl.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000022#include "llvm/Support/Debug.h"
23#include "llvm/Target/CostTable.h"
24#include "llvm/Target/TargetLowering.h"
25#include <algorithm>
26using namespace llvm;
27
28#define DEBUG_TYPE "aarch64tti"
29
Tim Northover3b0846e2014-05-24 12:50:23 +000030namespace {
31
Chandler Carruth705b1852015-01-31 03:43:40 +000032class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
33 typedef BasicTTIImplBase<AArch64TTIImpl> BaseT;
34 typedef TargetTransformInfo TTI;
35
Tim Northover3b0846e2014-05-24 12:50:23 +000036 const AArch64Subtarget *ST;
37 const AArch64TargetLowering *TLI;
38
39 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
40 /// are set if the result needs to be inserted and/or extracted from vectors.
Chandler Carruth705b1852015-01-31 03:43:40 +000041 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
Tim Northover3b0846e2014-05-24 12:50:23 +000042
Chad Rosierf9327d62015-01-26 22:51:15 +000043 enum MemIntrinsicType {
44 VECTOR_LDST_TWO_ELEMENTS,
45 VECTOR_LDST_THREE_ELEMENTS,
46 VECTOR_LDST_FOUR_ELEMENTS
47 };
48
Tim Northover3b0846e2014-05-24 12:50:23 +000049public:
Chandler Carruth705b1852015-01-31 03:43:40 +000050 explicit AArch64TTIImpl(const AArch64TargetMachine *TM = nullptr)
51 : BaseT(TM), ST(TM ? TM->getSubtargetImpl() : nullptr),
52 TLI(ST ? ST->getTargetLowering() : nullptr) {}
53
54 // Provide value semantics. MSVC requires that we spell all of these out.
55 AArch64TTIImpl(const AArch64TTIImpl &Arg)
56 : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
57 AArch64TTIImpl(AArch64TTIImpl &&Arg)
58 : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
59 TLI(std::move(Arg.TLI)) {}
60 AArch64TTIImpl &operator=(const AArch64TTIImpl &RHS) {
61 BaseT::operator=(static_cast<const BaseT &>(RHS));
62 ST = RHS.ST;
63 TLI = RHS.TLI;
64 return *this;
Tim Northover3b0846e2014-05-24 12:50:23 +000065 }
Chandler Carruth705b1852015-01-31 03:43:40 +000066 AArch64TTIImpl &operator=(AArch64TTIImpl &&RHS) {
67 BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
68 ST = std::move(RHS.ST);
69 TLI = std::move(RHS.TLI);
70 return *this;
Tim Northover3b0846e2014-05-24 12:50:23 +000071 }
72
73 /// \name Scalar TTI Implementations
74 /// @{
Chandler Carruth705b1852015-01-31 03:43:40 +000075
76 using BaseT::getIntImmCost;
77 unsigned getIntImmCost(int64_t Val);
78 unsigned getIntImmCost(const APInt &Imm, Type *Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +000079 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Chandler Carruth705b1852015-01-31 03:43:40 +000080 Type *Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +000081 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Chandler Carruth705b1852015-01-31 03:43:40 +000082 Type *Ty);
83 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
Tim Northover3b0846e2014-05-24 12:50:23 +000084
85 /// @}
86
87 /// \name Vector TTI Implementations
88 /// @{
89
Chandler Carruth705b1852015-01-31 03:43:40 +000090 unsigned getNumberOfRegisters(bool Vector) {
Tim Northover3b0846e2014-05-24 12:50:23 +000091 if (Vector) {
92 if (ST->hasNEON())
93 return 32;
94 return 0;
95 }
96 return 31;
97 }
98
Chandler Carruth705b1852015-01-31 03:43:40 +000099 unsigned getRegisterBitWidth(bool Vector) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000100 if (Vector) {
101 if (ST->hasNEON())
102 return 128;
103 return 0;
104 }
105 return 64;
106 }
107
Chandler Carruth705b1852015-01-31 03:43:40 +0000108 unsigned getMaxInterleaveFactor();
Tim Northover3b0846e2014-05-24 12:50:23 +0000109
Chandler Carruth705b1852015-01-31 03:43:40 +0000110 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000111
Chandler Carruth705b1852015-01-31 03:43:40 +0000112 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
Tim Northover3b0846e2014-05-24 12:50:23 +0000113
Karthik Bhat7f33ff72014-08-25 04:56:54 +0000114 unsigned getArithmeticInstrCost(
Chandler Carruth705b1852015-01-31 03:43:40 +0000115 unsigned Opcode, Type *Ty,
116 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
117 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
118 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
119 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
Tim Northover3b0846e2014-05-24 12:50:23 +0000120
Chandler Carruth705b1852015-01-31 03:43:40 +0000121 unsigned getAddressComputationCost(Type *Ty, bool IsComplex);
Tim Northover3b0846e2014-05-24 12:50:23 +0000122
Chandler Carruth705b1852015-01-31 03:43:40 +0000123 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000124
125 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
Chandler Carruth705b1852015-01-31 03:43:40 +0000126 unsigned AddressSpace);
James Molloy2b8933c2014-08-05 12:30:34 +0000127
Chandler Carruth705b1852015-01-31 03:43:40 +0000128 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
James Molloy2b8933c2014-08-05 12:30:34 +0000129
Kevin Qin72a799a2014-10-09 10:13:27 +0000130 void getUnrollingPreferences(const Function *F, Loop *L,
Chandler Carruth705b1852015-01-31 03:43:40 +0000131 TTI::UnrollingPreferences &UP);
Kevin Qin72a799a2014-10-09 10:13:27 +0000132
Chad Rosierf9327d62015-01-26 22:51:15 +0000133 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Chandler Carruth705b1852015-01-31 03:43:40 +0000134 Type *ExpectedType);
Chad Rosierf9327d62015-01-26 22:51:15 +0000135
Chandler Carruth705b1852015-01-31 03:43:40 +0000136 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
Kevin Qin72a799a2014-10-09 10:13:27 +0000137
Tim Northover3b0846e2014-05-24 12:50:23 +0000138 /// @}
139};
140
141} // end anonymous namespace
142
Tim Northover3b0846e2014-05-24 12:50:23 +0000143ImmutablePass *
144llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) {
Chandler Carruth705b1852015-01-31 03:43:40 +0000145 return new TargetTransformInfoWrapperPass(AArch64TTIImpl(TM));
Tim Northover3b0846e2014-05-24 12:50:23 +0000146}
147
148/// \brief Calculate the cost of materializing a 64-bit value. This helper
149/// method might only calculate a fraction of a larger immediate. Therefore it
150/// is valid to return a cost of ZERO.
Chandler Carruth705b1852015-01-31 03:43:40 +0000151unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000152 // Check if the immediate can be encoded within an instruction.
153 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
154 return 0;
155
156 if (Val < 0)
157 Val = ~Val;
158
159 // Calculate how many moves we will need to materialize this constant.
160 unsigned LZ = countLeadingZeros((uint64_t)Val);
161 return (64 - LZ + 15) / 16;
162}
163
164/// \brief Calculate the cost of materializing the given constant.
Chandler Carruth705b1852015-01-31 03:43:40 +0000165unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000166 assert(Ty->isIntegerTy());
167
168 unsigned BitSize = Ty->getPrimitiveSizeInBits();
169 if (BitSize == 0)
170 return ~0U;
171
172 // Sign-extend all constants to a multiple of 64-bit.
173 APInt ImmVal = Imm;
174 if (BitSize & 0x3f)
175 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
176
177 // Split the constant into 64-bit chunks and calculate the cost for each
178 // chunk.
179 unsigned Cost = 0;
180 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
181 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
182 int64_t Val = Tmp.getSExtValue();
183 Cost += getIntImmCost(Val);
184 }
185 // We need at least one instruction to materialze the constant.
186 return std::max(1U, Cost);
187}
188
Chandler Carruth705b1852015-01-31 03:43:40 +0000189unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
190 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000191 assert(Ty->isIntegerTy());
192
193 unsigned BitSize = Ty->getPrimitiveSizeInBits();
194 // There is no cost model for constants with a bit size of 0. Return TCC_Free
195 // here, so that constant hoisting will ignore this constant.
196 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +0000197 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000198
199 unsigned ImmIdx = ~0U;
200 switch (Opcode) {
201 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000202 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000203 case Instruction::GetElementPtr:
204 // Always hoist the base address of a GetElementPtr.
205 if (Idx == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +0000206 return 2 * TTI::TCC_Basic;
207 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000208 case Instruction::Store:
209 ImmIdx = 0;
210 break;
211 case Instruction::Add:
212 case Instruction::Sub:
213 case Instruction::Mul:
214 case Instruction::UDiv:
215 case Instruction::SDiv:
216 case Instruction::URem:
217 case Instruction::SRem:
218 case Instruction::And:
219 case Instruction::Or:
220 case Instruction::Xor:
221 case Instruction::ICmp:
222 ImmIdx = 1;
223 break;
224 // Always return TCC_Free for the shift value of a shift instruction.
225 case Instruction::Shl:
226 case Instruction::LShr:
227 case Instruction::AShr:
228 if (Idx == 1)
Chandler Carruth705b1852015-01-31 03:43:40 +0000229 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000230 break;
231 case Instruction::Trunc:
232 case Instruction::ZExt:
233 case Instruction::SExt:
234 case Instruction::IntToPtr:
235 case Instruction::PtrToInt:
236 case Instruction::BitCast:
237 case Instruction::PHI:
238 case Instruction::Call:
239 case Instruction::Select:
240 case Instruction::Ret:
241 case Instruction::Load:
242 break;
243 }
244
245 if (Idx == ImmIdx) {
246 unsigned NumConstants = (BitSize + 63) / 64;
Chandler Carruth705b1852015-01-31 03:43:40 +0000247 unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
248 return (Cost <= NumConstants * TTI::TCC_Basic)
249 ? static_cast<unsigned>(TTI::TCC_Free)
250 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000251 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000252 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000253}
254
Chandler Carruth705b1852015-01-31 03:43:40 +0000255unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
256 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000257 assert(Ty->isIntegerTy());
258
259 unsigned BitSize = Ty->getPrimitiveSizeInBits();
260 // There is no cost model for constants with a bit size of 0. Return TCC_Free
261 // here, so that constant hoisting will ignore this constant.
262 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +0000263 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000264
265 switch (IID) {
266 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000267 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000268 case Intrinsic::sadd_with_overflow:
269 case Intrinsic::uadd_with_overflow:
270 case Intrinsic::ssub_with_overflow:
271 case Intrinsic::usub_with_overflow:
272 case Intrinsic::smul_with_overflow:
273 case Intrinsic::umul_with_overflow:
274 if (Idx == 1) {
275 unsigned NumConstants = (BitSize + 63) / 64;
Chandler Carruth705b1852015-01-31 03:43:40 +0000276 unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
277 return (Cost <= NumConstants * TTI::TCC_Basic)
278 ? static_cast<unsigned>(TTI::TCC_Free)
279 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000280 }
281 break;
282 case Intrinsic::experimental_stackmap:
283 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000284 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000285 break;
286 case Intrinsic::experimental_patchpoint_void:
287 case Intrinsic::experimental_patchpoint_i64:
288 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000289 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000290 break;
291 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000292 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000293}
294
Chandler Carruth705b1852015-01-31 03:43:40 +0000295TargetTransformInfo::PopcntSupportKind
296AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000297 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
298 if (TyWidth == 32 || TyWidth == 64)
Chandler Carruth705b1852015-01-31 03:43:40 +0000299 return TTI::PSK_FastHardware;
Tim Northover3b0846e2014-05-24 12:50:23 +0000300 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
Chandler Carruth705b1852015-01-31 03:43:40 +0000301 return TTI::PSK_Software;
Tim Northover3b0846e2014-05-24 12:50:23 +0000302}
303
Chandler Carruth705b1852015-01-31 03:43:40 +0000304unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
305 Type *Src) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000306 int ISD = TLI->InstructionOpcodeToISD(Opcode);
307 assert(ISD && "Invalid opcode");
308
309 EVT SrcTy = TLI->getValueType(Src);
310 EVT DstTy = TLI->getValueType(Dst);
311
312 if (!SrcTy.isSimple() || !DstTy.isSimple())
Chandler Carruth705b1852015-01-31 03:43:40 +0000313 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000314
315 static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
316 // LowerVectorINT_TO_FP:
317 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000318 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000319 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
320 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000321 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000322 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000323
324 // Complex: to v2f32
325 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
326 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000327 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000328 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
329 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000330 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000331
332 // Complex: to v4f32
333 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
334 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
335 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
336 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
337
338 // Complex: to v2f64
339 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
340 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
341 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
342 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
343 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
344 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
345
346
Tim Northover3b0846e2014-05-24 12:50:23 +0000347 // LowerVectorFP_TO_INT
Tim Northoveref0d7602014-06-15 09:27:06 +0000348 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000349 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
350 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000351 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000352 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
353 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000354
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000355 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
Tim Northoveref0d7602014-06-15 09:27:06 +0000356 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000357 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
358 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000359 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000360 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
361 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
362
363 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
364 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
365 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000366 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000367 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
368
369 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
370 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
371 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
372 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
373 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
374 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
375 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000376 };
377
378 int Idx = ConvertCostTableLookup<MVT>(
379 ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
380 SrcTy.getSimpleVT());
381 if (Idx != -1)
382 return ConversionTbl[Idx].Cost;
383
Chandler Carruth705b1852015-01-31 03:43:40 +0000384 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000385}
386
Chandler Carruth705b1852015-01-31 03:43:40 +0000387unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
388 unsigned Index) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000389 assert(Val->isVectorTy() && "This must be a vector type");
390
391 if (Index != -1U) {
392 // Legalize the type.
393 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
394
395 // This type is legalized to a scalar type.
396 if (!LT.second.isVector())
397 return 0;
398
399 // The type may be split. Normalize the index to the new type.
400 unsigned Width = LT.second.getVectorNumElements();
401 Index = Index % Width;
402
403 // The element at index zero is already inside the vector.
404 if (Index == 0)
405 return 0;
406 }
407
408 // All other insert/extracts cost this much.
409 return 2;
410}
411
Chandler Carruth705b1852015-01-31 03:43:40 +0000412unsigned AArch64TTIImpl::getArithmeticInstrCost(
413 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
414 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
415 TTI::OperandValueProperties Opd2PropInfo) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000416 // Legalize the type.
417 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
418
419 int ISD = TLI->InstructionOpcodeToISD(Opcode);
420
Chad Rosier70d54ac2014-09-29 13:59:31 +0000421 if (ISD == ISD::SDIV &&
422 Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
423 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
424 // On AArch64, scalar signed division by constants power-of-two are
425 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
426 // The OperandValue properties many not be same as that of previous
427 // operation; conservatively assume OP_None.
428 unsigned Cost =
429 getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
430 TargetTransformInfo::OP_None,
431 TargetTransformInfo::OP_None);
432 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
433 TargetTransformInfo::OP_None,
434 TargetTransformInfo::OP_None);
435 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
436 TargetTransformInfo::OP_None,
437 TargetTransformInfo::OP_None);
438 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
439 TargetTransformInfo::OP_None,
440 TargetTransformInfo::OP_None);
441 return Cost;
442 }
443
Tim Northover3b0846e2014-05-24 12:50:23 +0000444 switch (ISD) {
445 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000446 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
447 Opd1PropInfo, Opd2PropInfo);
Tim Northover3b0846e2014-05-24 12:50:23 +0000448 case ISD::ADD:
449 case ISD::MUL:
450 case ISD::XOR:
451 case ISD::OR:
452 case ISD::AND:
453 // These nodes are marked as 'custom' for combining purposes only.
454 // We know that they are legal. See LowerAdd in ISelLowering.
455 return 1 * LT.first;
456 }
457}
458
Chandler Carruth705b1852015-01-31 03:43:40 +0000459unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000460 // Address computations in vectorized code with non-consecutive addresses will
461 // likely result in more instructions compared to scalar code where the
462 // computation can more often be merged into the index mode. The resulting
463 // extra micro-ops can significantly decrease throughput.
464 unsigned NumVectorInstToHideOverhead = 10;
465
466 if (Ty->isVectorTy() && IsComplex)
467 return NumVectorInstToHideOverhead;
468
469 // In many cases the address computation is not merged into the instruction
470 // addressing mode.
471 return 1;
472}
473
Chandler Carruth705b1852015-01-31 03:43:40 +0000474unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
475 Type *CondTy) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000476
477 int ISD = TLI->InstructionOpcodeToISD(Opcode);
478 // We don't lower vector selects well that are wider than the register width.
479 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
480 // We would need this many instructions to hide the scalarization happening.
481 unsigned AmortizationCost = 20;
482 static const TypeConversionCostTblEntry<MVT::SimpleValueType>
483 VectorSelectTbl[] = {
484 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
485 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
486 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
487 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
488 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
489 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
490 };
491
492 EVT SelCondTy = TLI->getValueType(CondTy);
493 EVT SelValTy = TLI->getValueType(ValTy);
494 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
495 int Idx =
496 ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
497 SelValTy.getSimpleVT());
498 if (Idx != -1)
499 return VectorSelectTbl[Idx].Cost;
500 }
501 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000502 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000503}
504
Chandler Carruth705b1852015-01-31 03:43:40 +0000505unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
506 unsigned Alignment,
507 unsigned AddressSpace) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000508 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
509
510 if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
511 Src->getVectorElementType()->isIntegerTy(64)) {
512 // Unaligned stores are extremely inefficient. We don't split
513 // unaligned v2i64 stores because the negative impact that has shown in
514 // practice on inlined memcpy code.
515 // We make v2i64 stores expensive so that we will only vectorize if there
516 // are 6 other instructions getting vectorized.
517 unsigned AmortizationCost = 6;
518
519 return LT.first * 2 * AmortizationCost;
520 }
521
522 if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
523 Src->getVectorNumElements() < 8) {
524 // We scalarize the loads/stores because there is not v.4b register and we
525 // have to promote the elements to v.4h.
526 unsigned NumVecElts = Src->getVectorNumElements();
527 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
528 // We generate 2 instructions per vector element.
529 return NumVectorizableInstsToAmortize * NumVecElts * 2;
530 }
531
532 return LT.first;
533}
James Molloy2b8933c2014-08-05 12:30:34 +0000534
Chandler Carruth705b1852015-01-31 03:43:40 +0000535unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
James Molloy2b8933c2014-08-05 12:30:34 +0000536 unsigned Cost = 0;
537 for (auto *I : Tys) {
538 if (!I->isVectorTy())
539 continue;
540 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
541 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
542 getMemoryOpCost(Instruction::Load, I, 128, 0);
543 }
544 return Cost;
545}
James Molloya88896b2014-08-21 00:02:51 +0000546
Chandler Carruth705b1852015-01-31 03:43:40 +0000547unsigned AArch64TTIImpl::getMaxInterleaveFactor() {
Gerolf Hoflehner7b0abb82014-09-10 20:31:57 +0000548 if (ST->isCortexA57())
James Molloya88896b2014-08-21 00:02:51 +0000549 return 4;
550 return 2;
551}
Kevin Qin72a799a2014-10-09 10:13:27 +0000552
Chandler Carruth705b1852015-01-31 03:43:40 +0000553void AArch64TTIImpl::getUnrollingPreferences(const Function *F, Loop *L,
554 TTI::UnrollingPreferences &UP) {
Kevin Qin72a799a2014-10-09 10:13:27 +0000555 // Disable partial & runtime unrolling on -Os.
556 UP.PartialOptSizeThreshold = 0;
557}
Chad Rosierf9327d62015-01-26 22:51:15 +0000558
Chandler Carruth705b1852015-01-31 03:43:40 +0000559Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
560 Type *ExpectedType) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000561 switch (Inst->getIntrinsicID()) {
562 default:
563 return nullptr;
564 case Intrinsic::aarch64_neon_st2:
565 case Intrinsic::aarch64_neon_st3:
566 case Intrinsic::aarch64_neon_st4: {
567 // Create a struct type
568 StructType *ST = dyn_cast<StructType>(ExpectedType);
569 if (!ST)
570 return nullptr;
571 unsigned NumElts = Inst->getNumArgOperands() - 1;
572 if (ST->getNumElements() != NumElts)
573 return nullptr;
574 for (unsigned i = 0, e = NumElts; i != e; ++i) {
575 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
576 return nullptr;
577 }
578 Value *Res = UndefValue::get(ExpectedType);
579 IRBuilder<> Builder(Inst);
580 for (unsigned i = 0, e = NumElts; i != e; ++i) {
581 Value *L = Inst->getArgOperand(i);
582 Res = Builder.CreateInsertValue(Res, L, i);
583 }
584 return Res;
585 }
586 case Intrinsic::aarch64_neon_ld2:
587 case Intrinsic::aarch64_neon_ld3:
588 case Intrinsic::aarch64_neon_ld4:
589 if (Inst->getType() == ExpectedType)
590 return Inst;
591 return nullptr;
592 }
593}
594
Chandler Carruth705b1852015-01-31 03:43:40 +0000595bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
596 MemIntrinsicInfo &Info) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000597 switch (Inst->getIntrinsicID()) {
598 default:
599 break;
600 case Intrinsic::aarch64_neon_ld2:
601 case Intrinsic::aarch64_neon_ld3:
602 case Intrinsic::aarch64_neon_ld4:
603 Info.ReadMem = true;
604 Info.WriteMem = false;
605 Info.Vol = false;
606 Info.NumMemRefs = 1;
607 Info.PtrVal = Inst->getArgOperand(0);
608 break;
609 case Intrinsic::aarch64_neon_st2:
610 case Intrinsic::aarch64_neon_st3:
611 case Intrinsic::aarch64_neon_st4:
612 Info.ReadMem = false;
613 Info.WriteMem = true;
614 Info.Vol = false;
615 Info.NumMemRefs = 1;
616 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
617 break;
618 }
619
620 switch (Inst->getIntrinsicID()) {
621 default:
622 return false;
623 case Intrinsic::aarch64_neon_ld2:
624 case Intrinsic::aarch64_neon_st2:
625 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
626 break;
627 case Intrinsic::aarch64_neon_ld3:
628 case Intrinsic::aarch64_neon_st3:
629 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
630 break;
631 case Intrinsic::aarch64_neon_ld4:
632 case Intrinsic::aarch64_neon_st4:
633 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
634 break;
635 }
636 return true;
637}