blob: f5d038d52e1c83ade7178804e31b3c0a54bd27a3 [file] [log] [blame]
Chandler Carruth93dcdc42015-01-31 11:17:59 +00001//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00009
Chandler Carruth93dcdc42015-01-31 11:17:59 +000010#include "AArch64TargetTransformInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000011#include "MCTargetDesc/AArch64AddressingModes.h"
Kevin Qinaef68412015-03-09 06:14:28 +000012#include "llvm/Analysis/LoopInfo.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000013#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth705b1852015-01-31 03:43:40 +000014#include "llvm/CodeGen/BasicTTIImpl.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000015#include "llvm/Support/Debug.h"
16#include "llvm/Target/CostTable.h"
17#include "llvm/Target/TargetLowering.h"
18#include <algorithm>
19using namespace llvm;
20
21#define DEBUG_TYPE "aarch64tti"
22
Florian Hahn2665feb2017-06-27 22:27:32 +000023bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
24 const Function *Callee) const {
25 const TargetMachine &TM = getTLI()->getTargetMachine();
26
27 const FeatureBitset &CallerBits =
28 TM.getSubtargetImpl(*Caller)->getFeatureBits();
29 const FeatureBitset &CalleeBits =
30 TM.getSubtargetImpl(*Callee)->getFeatureBits();
31
32 // Inline a callee if its target-features are a subset of the callers
33 // target-features.
34 return (CallerBits & CalleeBits) == CalleeBits;
35}
36
Tim Northover3b0846e2014-05-24 12:50:23 +000037/// \brief Calculate the cost of materializing a 64-bit value. This helper
38/// method might only calculate a fraction of a larger immediate. Therefore it
39/// is valid to return a cost of ZERO.
Chandler Carruth93205eb2015-08-05 18:08:10 +000040int AArch64TTIImpl::getIntImmCost(int64_t Val) {
Tim Northover3b0846e2014-05-24 12:50:23 +000041 // Check if the immediate can be encoded within an instruction.
42 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
43 return 0;
44
45 if (Val < 0)
46 Val = ~Val;
47
48 // Calculate how many moves we will need to materialize this constant.
49 unsigned LZ = countLeadingZeros((uint64_t)Val);
50 return (64 - LZ + 15) / 16;
51}
52
53/// \brief Calculate the cost of materializing the given constant.
Chandler Carruth93205eb2015-08-05 18:08:10 +000054int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +000055 assert(Ty->isIntegerTy());
56
57 unsigned BitSize = Ty->getPrimitiveSizeInBits();
58 if (BitSize == 0)
59 return ~0U;
60
61 // Sign-extend all constants to a multiple of 64-bit.
62 APInt ImmVal = Imm;
63 if (BitSize & 0x3f)
64 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
65
66 // Split the constant into 64-bit chunks and calculate the cost for each
67 // chunk.
Chandler Carruth93205eb2015-08-05 18:08:10 +000068 int Cost = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +000069 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
70 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
71 int64_t Val = Tmp.getSExtValue();
72 Cost += getIntImmCost(Val);
73 }
74 // We need at least one instruction to materialze the constant.
Chandler Carruth93205eb2015-08-05 18:08:10 +000075 return std::max(1, Cost);
Tim Northover3b0846e2014-05-24 12:50:23 +000076}
77
Chandler Carruth93205eb2015-08-05 18:08:10 +000078int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
79 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +000080 assert(Ty->isIntegerTy());
81
82 unsigned BitSize = Ty->getPrimitiveSizeInBits();
83 // There is no cost model for constants with a bit size of 0. Return TCC_Free
84 // here, so that constant hoisting will ignore this constant.
85 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +000086 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000087
88 unsigned ImmIdx = ~0U;
89 switch (Opcode) {
90 default:
Chandler Carruth705b1852015-01-31 03:43:40 +000091 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000092 case Instruction::GetElementPtr:
93 // Always hoist the base address of a GetElementPtr.
94 if (Idx == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +000095 return 2 * TTI::TCC_Basic;
96 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000097 case Instruction::Store:
98 ImmIdx = 0;
99 break;
100 case Instruction::Add:
101 case Instruction::Sub:
102 case Instruction::Mul:
103 case Instruction::UDiv:
104 case Instruction::SDiv:
105 case Instruction::URem:
106 case Instruction::SRem:
107 case Instruction::And:
108 case Instruction::Or:
109 case Instruction::Xor:
110 case Instruction::ICmp:
111 ImmIdx = 1;
112 break;
113 // Always return TCC_Free for the shift value of a shift instruction.
114 case Instruction::Shl:
115 case Instruction::LShr:
116 case Instruction::AShr:
117 if (Idx == 1)
Chandler Carruth705b1852015-01-31 03:43:40 +0000118 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000119 break;
120 case Instruction::Trunc:
121 case Instruction::ZExt:
122 case Instruction::SExt:
123 case Instruction::IntToPtr:
124 case Instruction::PtrToInt:
125 case Instruction::BitCast:
126 case Instruction::PHI:
127 case Instruction::Call:
128 case Instruction::Select:
129 case Instruction::Ret:
130 case Instruction::Load:
131 break;
132 }
133
134 if (Idx == ImmIdx) {
Chandler Carruth93205eb2015-08-05 18:08:10 +0000135 int NumConstants = (BitSize + 63) / 64;
136 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth705b1852015-01-31 03:43:40 +0000137 return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth93205eb2015-08-05 18:08:10 +0000138 ? static_cast<int>(TTI::TCC_Free)
Chandler Carruth705b1852015-01-31 03:43:40 +0000139 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000140 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000141 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000142}
143
Chandler Carruth93205eb2015-08-05 18:08:10 +0000144int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
145 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000146 assert(Ty->isIntegerTy());
147
148 unsigned BitSize = Ty->getPrimitiveSizeInBits();
149 // There is no cost model for constants with a bit size of 0. Return TCC_Free
150 // here, so that constant hoisting will ignore this constant.
151 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +0000152 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000153
154 switch (IID) {
155 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000156 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000157 case Intrinsic::sadd_with_overflow:
158 case Intrinsic::uadd_with_overflow:
159 case Intrinsic::ssub_with_overflow:
160 case Intrinsic::usub_with_overflow:
161 case Intrinsic::smul_with_overflow:
162 case Intrinsic::umul_with_overflow:
163 if (Idx == 1) {
Chandler Carruth93205eb2015-08-05 18:08:10 +0000164 int NumConstants = (BitSize + 63) / 64;
165 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth705b1852015-01-31 03:43:40 +0000166 return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth93205eb2015-08-05 18:08:10 +0000167 ? static_cast<int>(TTI::TCC_Free)
Chandler Carruth705b1852015-01-31 03:43:40 +0000168 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000169 }
170 break;
171 case Intrinsic::experimental_stackmap:
172 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000173 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000174 break;
175 case Intrinsic::experimental_patchpoint_void:
176 case Intrinsic::experimental_patchpoint_i64:
177 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000178 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000179 break;
180 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000181 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000182}
183
Chandler Carruth705b1852015-01-31 03:43:40 +0000184TargetTransformInfo::PopcntSupportKind
185AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000186 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
187 if (TyWidth == 32 || TyWidth == 64)
Chandler Carruth705b1852015-01-31 03:43:40 +0000188 return TTI::PSK_FastHardware;
Tim Northover3b0846e2014-05-24 12:50:23 +0000189 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
Chandler Carruth705b1852015-01-31 03:43:40 +0000190 return TTI::PSK_Software;
Tim Northover3b0846e2014-05-24 12:50:23 +0000191}
192
Matthew Simpson78fd46b2017-05-09 20:18:12 +0000193bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
194 ArrayRef<const Value *> Args) {
195
196 // A helper that returns a vector type from the given type. The number of
197 // elements in type Ty determine the vector width.
198 auto toVectorTy = [&](Type *ArgTy) {
199 return VectorType::get(ArgTy->getScalarType(),
200 DstTy->getVectorNumElements());
201 };
202
203 // Exit early if DstTy is not a vector type whose elements are at least
204 // 16-bits wide.
205 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
206 return false;
207
208 // Determine if the operation has a widening variant. We consider both the
209 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
210 // instructions.
211 //
212 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
213 // verify that their extending operands are eliminated during code
214 // generation.
215 switch (Opcode) {
216 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
217 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
218 break;
219 default:
220 return false;
221 }
222
223 // To be a widening instruction (either the "wide" or "long" versions), the
224 // second operand must be a sign- or zero extend having a single user. We
225 // only consider extends having a single user because they may otherwise not
226 // be eliminated.
227 if (Args.size() != 2 ||
228 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
229 !Args[1]->hasOneUse())
230 return false;
231 auto *Extend = cast<CastInst>(Args[1]);
232
233 // Legalize the destination type and ensure it can be used in a widening
234 // operation.
235 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
236 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
237 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
238 return false;
239
240 // Legalize the source type and ensure it can be used in a widening
241 // operation.
242 Type *SrcTy = toVectorTy(Extend->getSrcTy());
243 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
244 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
245 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
246 return false;
247
248 // Get the total number of vector elements in the legalized types.
249 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
250 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
251
252 // Return true if the legalized types have the same number of vector elements
253 // and the destination element type size is twice that of the source type.
254 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
255}
256
Jonas Paulssonfccc7d62017-04-12 11:49:08 +0000257int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
258 const Instruction *I) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000259 int ISD = TLI->InstructionOpcodeToISD(Opcode);
260 assert(ISD && "Invalid opcode");
261
Matthew Simpson78fd46b2017-05-09 20:18:12 +0000262 // If the cast is observable, and it is used by a widening instruction (e.g.,
263 // uaddl, saddw, etc.), it may be free.
264 if (I && I->hasOneUse()) {
265 auto *SingleUser = cast<Instruction>(*I->user_begin());
266 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
267 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
268 // If the cast is the second operand, it is free. We will generate either
269 // a "wide" or "long" version of the widening instruction.
270 if (I == SingleUser->getOperand(1))
271 return 0;
272 // If the cast is not the second operand, it will be free if it looks the
273 // same as the second operand. In this case, we will generate a "long"
274 // version of the widening instruction.
275 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
276 if (I->getOpcode() == Cast->getOpcode() &&
277 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
278 return 0;
279 }
280 }
281
Mehdi Amini44ede332015-07-09 02:09:04 +0000282 EVT SrcTy = TLI->getValueType(DL, Src);
283 EVT DstTy = TLI->getValueType(DL, Dst);
Tim Northover3b0846e2014-05-24 12:50:23 +0000284
285 if (!SrcTy.isSimple() || !DstTy.isSimple())
Chandler Carruth705b1852015-01-31 03:43:40 +0000286 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000287
Craig Topper4b275762015-10-28 04:02:12 +0000288 static const TypeConversionCostTblEntry
Craig Topper7bf52c92015-10-25 00:27:14 +0000289 ConversionTbl[] = {
Matthew Simpson343af072015-11-18 18:03:06 +0000290 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
291 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
292 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
293 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
Silviu Barangab322aa62015-08-17 16:05:09 +0000294
295 // The number of shll instructions for the extension.
Matthew Simpson343af072015-11-18 18:03:06 +0000296 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
297 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
298 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
299 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
300 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
301 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
302 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
303 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
304 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
305 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
306 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
307 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
308 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
309 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
Silviu Barangab322aa62015-08-17 16:05:09 +0000310 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
311 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
312
Tim Northover3b0846e2014-05-24 12:50:23 +0000313 // LowerVectorINT_TO_FP:
314 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000315 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000316 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
317 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000318 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000319 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000320
321 // Complex: to v2f32
322 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
323 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000324 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000325 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
326 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000327 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000328
329 // Complex: to v4f32
330 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
331 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
332 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
333 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
334
Silviu Barangab322aa62015-08-17 16:05:09 +0000335 // Complex: to v8f32
336 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
337 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
338 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
339 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
340
341 // Complex: to v16f32
342 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
343 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
344
Tim Northoveref0d7602014-06-15 09:27:06 +0000345 // Complex: to v2f64
346 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
347 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
348 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
349 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
350 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
351 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
352
353
Tim Northover3b0846e2014-05-24 12:50:23 +0000354 // LowerVectorFP_TO_INT
Tim Northoveref0d7602014-06-15 09:27:06 +0000355 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000356 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
357 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000358 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000359 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
360 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000361
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000362 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
Tim Northoveref0d7602014-06-15 09:27:06 +0000363 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000364 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
365 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000366 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000367 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
368 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
369
370 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
371 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
372 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000373 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000374 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
375
376 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
377 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
378 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
379 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
380 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
381 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
382 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000383 };
384
Craig Topperee0c8592015-10-27 04:14:24 +0000385 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
386 DstTy.getSimpleVT(),
387 SrcTy.getSimpleVT()))
388 return Entry->Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000389
Chandler Carruth705b1852015-01-31 03:43:40 +0000390 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000391}
392
Matthew Simpsone5dfb082016-04-27 15:20:21 +0000393int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
394 VectorType *VecTy,
395 unsigned Index) {
396
397 // Make sure we were given a valid extend opcode.
Matthew Simpson47bd3992016-04-27 16:25:04 +0000398 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
399 "Invalid opcode");
Matthew Simpsone5dfb082016-04-27 15:20:21 +0000400
401 // We are extending an element we extract from a vector, so the source type
402 // of the extend is the element type of the vector.
403 auto *Src = VecTy->getElementType();
404
405 // Sign- and zero-extends are for integer types only.
406 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
407
408 // Get the cost for the extract. We compute the cost (if any) for the extend
409 // below.
410 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
411
412 // Legalize the types.
413 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
414 auto DstVT = TLI->getValueType(DL, Dst);
415 auto SrcVT = TLI->getValueType(DL, Src);
416
417 // If the resulting type is still a vector and the destination type is legal,
418 // we may get the extension for free. If not, get the default cost for the
419 // extend.
420 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
421 return Cost + getCastInstrCost(Opcode, Dst, Src);
422
423 // The destination type should be larger than the element type. If not, get
424 // the default cost for the extend.
425 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
426 return Cost + getCastInstrCost(Opcode, Dst, Src);
427
428 switch (Opcode) {
429 default:
430 llvm_unreachable("Opcode should be either SExt or ZExt");
431
432 // For sign-extends, we only need a smov, which performs the extension
433 // automatically.
434 case Instruction::SExt:
435 return Cost;
436
437 // For zero-extends, the extend is performed automatically by a umov unless
438 // the destination type is i64 and the element type is i8 or i16.
439 case Instruction::ZExt:
440 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
441 return Cost;
442 }
443
444 // If we are unable to perform the extend for free, get the default cost.
445 return Cost + getCastInstrCost(Opcode, Dst, Src);
446}
447
Chandler Carruth93205eb2015-08-05 18:08:10 +0000448int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
449 unsigned Index) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000450 assert(Val->isVectorTy() && "This must be a vector type");
451
452 if (Index != -1U) {
453 // Legalize the type.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000454 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
Tim Northover3b0846e2014-05-24 12:50:23 +0000455
456 // This type is legalized to a scalar type.
457 if (!LT.second.isVector())
458 return 0;
459
460 // The type may be split. Normalize the index to the new type.
461 unsigned Width = LT.second.getVectorNumElements();
462 Index = Index % Width;
463
464 // The element at index zero is already inside the vector.
465 if (Index == 0)
466 return 0;
467 }
468
469 // All other insert/extracts cost this much.
Matthias Braun651cff42016-06-02 18:03:53 +0000470 return ST->getVectorInsertExtractBaseCost();
Tim Northover3b0846e2014-05-24 12:50:23 +0000471}
472
Chandler Carruth93205eb2015-08-05 18:08:10 +0000473int AArch64TTIImpl::getArithmeticInstrCost(
Chandler Carruth705b1852015-01-31 03:43:40 +0000474 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
475 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
Mohammed Agabaria2c96c432017-01-11 08:23:37 +0000476 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000477 // Legalize the type.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000478 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000479
Matthew Simpson78fd46b2017-05-09 20:18:12 +0000480 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
481 // add in the widening overhead specified by the sub-target. Since the
482 // extends feeding widening instructions are performed automatically, they
483 // aren't present in the generated code and have a zero cost. By adding a
484 // widening overhead here, we attach the total cost of the combined operation
485 // to the widening instruction.
486 int Cost = 0;
487 if (isWideningInstruction(Ty, Opcode, Args))
488 Cost += ST->getWideningBaseCost();
489
Tim Northover3b0846e2014-05-24 12:50:23 +0000490 int ISD = TLI->InstructionOpcodeToISD(Opcode);
491
Chad Rosier70d54ac2014-09-29 13:59:31 +0000492 if (ISD == ISD::SDIV &&
493 Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
494 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
495 // On AArch64, scalar signed division by constants power-of-two are
496 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
497 // The OperandValue properties many not be same as that of previous
498 // operation; conservatively assume OP_None.
Matthew Simpson78fd46b2017-05-09 20:18:12 +0000499 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
500 TargetTransformInfo::OP_None,
501 TargetTransformInfo::OP_None);
Chad Rosier70d54ac2014-09-29 13:59:31 +0000502 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
503 TargetTransformInfo::OP_None,
504 TargetTransformInfo::OP_None);
505 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
506 TargetTransformInfo::OP_None,
507 TargetTransformInfo::OP_None);
508 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
509 TargetTransformInfo::OP_None,
510 TargetTransformInfo::OP_None);
511 return Cost;
512 }
513
Tim Northover3b0846e2014-05-24 12:50:23 +0000514 switch (ISD) {
515 default:
Matthew Simpson78fd46b2017-05-09 20:18:12 +0000516 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
517 Opd1PropInfo, Opd2PropInfo);
Tim Northover3b0846e2014-05-24 12:50:23 +0000518 case ISD::ADD:
519 case ISD::MUL:
520 case ISD::XOR:
521 case ISD::OR:
522 case ISD::AND:
523 // These nodes are marked as 'custom' for combining purposes only.
524 // We know that they are legal. See LowerAdd in ISelLowering.
Matthew Simpson78fd46b2017-05-09 20:18:12 +0000525 return (Cost + 1) * LT.first;
Tim Northover3b0846e2014-05-24 12:50:23 +0000526 }
527}
528
Mohammed Agabaria23599ba2017-01-05 14:03:41 +0000529int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
530 const SCEV *Ptr) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000531 // Address computations in vectorized code with non-consecutive addresses will
532 // likely result in more instructions compared to scalar code where the
533 // computation can more often be merged into the index mode. The resulting
534 // extra micro-ops can significantly decrease throughput.
535 unsigned NumVectorInstToHideOverhead = 10;
Mohammed Agabaria23599ba2017-01-05 14:03:41 +0000536 int MaxMergeDistance = 64;
Tim Northover3b0846e2014-05-24 12:50:23 +0000537
Mohammed Agabaria23599ba2017-01-05 14:03:41 +0000538 if (Ty->isVectorTy() && SE &&
539 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
Tim Northover3b0846e2014-05-24 12:50:23 +0000540 return NumVectorInstToHideOverhead;
541
542 // In many cases the address computation is not merged into the instruction
543 // addressing mode.
544 return 1;
545}
546
Chandler Carruth93205eb2015-08-05 18:08:10 +0000547int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Jonas Paulssonfccc7d62017-04-12 11:49:08 +0000548 Type *CondTy, const Instruction *I) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000549
550 int ISD = TLI->InstructionOpcodeToISD(Opcode);
Silviu Barangaa3e27ed2015-09-09 15:35:02 +0000551 // We don't lower some vector selects well that are wider than the register
552 // width.
Tim Northover3b0846e2014-05-24 12:50:23 +0000553 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
554 // We would need this many instructions to hide the scalarization happening.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000555 const int AmortizationCost = 20;
Craig Topper4b275762015-10-28 04:02:12 +0000556 static const TypeConversionCostTblEntry
Tim Northover3b0846e2014-05-24 12:50:23 +0000557 VectorSelectTbl[] = {
Silviu Barangaa3e27ed2015-09-09 15:35:02 +0000558 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
559 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
560 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000561 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
562 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
563 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
564 };
565
Mehdi Amini44ede332015-07-09 02:09:04 +0000566 EVT SelCondTy = TLI->getValueType(DL, CondTy);
567 EVT SelValTy = TLI->getValueType(DL, ValTy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000568 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
Craig Topperee0c8592015-10-27 04:14:24 +0000569 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
570 SelCondTy.getSimpleVT(),
571 SelValTy.getSimpleVT()))
572 return Entry->Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000573 }
574 }
Jonas Paulssonfccc7d62017-04-12 11:49:08 +0000575 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
Tim Northover3b0846e2014-05-24 12:50:23 +0000576}
577
Evandro Menezes330e1b82017-01-10 23:42:21 +0000578int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
Jonas Paulssonfccc7d62017-04-12 11:49:08 +0000579 unsigned Alignment, unsigned AddressSpace,
580 const Instruction *I) {
Evandro Menezes330e1b82017-01-10 23:42:21 +0000581 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000582
Matthew Simpson2c8de192016-12-15 18:36:59 +0000583 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
Evandro Menezes330e1b82017-01-10 23:42:21 +0000584 LT.second.is128BitVector() && Alignment < 16) {
585 // Unaligned stores are extremely inefficient. We don't split all
586 // unaligned 128-bit stores because the negative impact that has shown in
587 // practice on inlined block copy code.
588 // We make such stores expensive so that we will only vectorize if there
Tim Northover3b0846e2014-05-24 12:50:23 +0000589 // are 6 other instructions getting vectorized.
Evandro Menezes330e1b82017-01-10 23:42:21 +0000590 const int AmortizationCost = 6;
Tim Northover3b0846e2014-05-24 12:50:23 +0000591
592 return LT.first * 2 * AmortizationCost;
593 }
594
Evandro Menezes330e1b82017-01-10 23:42:21 +0000595 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8) &&
596 Ty->getVectorNumElements() < 8) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000597 // We scalarize the loads/stores because there is not v.4b register and we
598 // have to promote the elements to v.4h.
Evandro Menezes330e1b82017-01-10 23:42:21 +0000599 unsigned NumVecElts = Ty->getVectorNumElements();
Tim Northover3b0846e2014-05-24 12:50:23 +0000600 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
601 // We generate 2 instructions per vector element.
602 return NumVectorizableInstsToAmortize * NumVecElts * 2;
603 }
604
605 return LT.first;
606}
James Molloy2b8933c2014-08-05 12:30:34 +0000607
Chandler Carruth93205eb2015-08-05 18:08:10 +0000608int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
609 unsigned Factor,
610 ArrayRef<unsigned> Indices,
611 unsigned Alignment,
612 unsigned AddressSpace) {
Hao Liu7ec8ee32015-06-26 02:32:07 +0000613 assert(Factor >= 2 && "Invalid interleave factor");
614 assert(isa<VectorType>(VecTy) && "Expect a vector type");
615
616 if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
617 unsigned NumElts = VecTy->getVectorNumElements();
Matthew Simpson1468d3e2017-04-10 18:34:37 +0000618 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
Hao Liu7ec8ee32015-06-26 02:32:07 +0000619
620 // ldN/stN only support legal vector types of size 64 or 128 in bits.
Matthew Simpsonaee97712017-03-02 15:15:35 +0000621 // Accesses having vector types that are a multiple of 128 bits can be
622 // matched to more than one ldN/stN instruction.
Matthew Simpson1468d3e2017-04-10 18:34:37 +0000623 if (NumElts % Factor == 0 &&
624 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
625 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
Hao Liu7ec8ee32015-06-26 02:32:07 +0000626 }
627
628 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
629 Alignment, AddressSpace);
630}
631
Chandler Carruth93205eb2015-08-05 18:08:10 +0000632int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
633 int Cost = 0;
James Molloy2b8933c2014-08-05 12:30:34 +0000634 for (auto *I : Tys) {
635 if (!I->isVectorTy())
636 continue;
637 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
638 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
639 getMemoryOpCost(Instruction::Load, I, 128, 0);
640 }
641 return Cost;
642}
James Molloya88896b2014-08-21 00:02:51 +0000643
Wei Mi062c7442015-05-06 17:12:25 +0000644unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
Matthias Braun651cff42016-06-02 18:03:53 +0000645 return ST->getMaxInterleaveFactor();
James Molloya88896b2014-08-21 00:02:51 +0000646}
Kevin Qin72a799a2014-10-09 10:13:27 +0000647
Geoff Berry66d9bdb2017-06-28 15:53:17 +0000648void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
Chandler Carruth705b1852015-01-31 03:43:40 +0000649 TTI::UnrollingPreferences &UP) {
Kevin Qinaef68412015-03-09 06:14:28 +0000650 // Enable partial unrolling and runtime unrolling.
Geoff Berry66d9bdb2017-06-28 15:53:17 +0000651 BaseT::getUnrollingPreferences(L, SE, UP);
Kevin Qinaef68412015-03-09 06:14:28 +0000652
653 // For inner loop, it is more likely to be a hot one, and the runtime check
654 // can be promoted out from LICM pass, so the overhead is less, let's try
655 // a larger threshold to unroll more loops.
656 if (L->getLoopDepth() > 1)
657 UP.PartialThreshold *= 2;
658
Kevin Qin72a799a2014-10-09 10:13:27 +0000659 // Disable partial & runtime unrolling on -Os.
660 UP.PartialOptSizeThreshold = 0;
661}
Chad Rosierf9327d62015-01-26 22:51:15 +0000662
Chandler Carruth705b1852015-01-31 03:43:40 +0000663Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
664 Type *ExpectedType) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000665 switch (Inst->getIntrinsicID()) {
666 default:
667 return nullptr;
668 case Intrinsic::aarch64_neon_st2:
669 case Intrinsic::aarch64_neon_st3:
670 case Intrinsic::aarch64_neon_st4: {
671 // Create a struct type
672 StructType *ST = dyn_cast<StructType>(ExpectedType);
673 if (!ST)
674 return nullptr;
675 unsigned NumElts = Inst->getNumArgOperands() - 1;
676 if (ST->getNumElements() != NumElts)
677 return nullptr;
678 for (unsigned i = 0, e = NumElts; i != e; ++i) {
679 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
680 return nullptr;
681 }
682 Value *Res = UndefValue::get(ExpectedType);
683 IRBuilder<> Builder(Inst);
684 for (unsigned i = 0, e = NumElts; i != e; ++i) {
685 Value *L = Inst->getArgOperand(i);
686 Res = Builder.CreateInsertValue(Res, L, i);
687 }
688 return Res;
689 }
690 case Intrinsic::aarch64_neon_ld2:
691 case Intrinsic::aarch64_neon_ld3:
692 case Intrinsic::aarch64_neon_ld4:
693 if (Inst->getType() == ExpectedType)
694 return Inst;
695 return nullptr;
696 }
697}
698
Chandler Carruth705b1852015-01-31 03:43:40 +0000699bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
700 MemIntrinsicInfo &Info) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000701 switch (Inst->getIntrinsicID()) {
702 default:
703 break;
704 case Intrinsic::aarch64_neon_ld2:
705 case Intrinsic::aarch64_neon_ld3:
706 case Intrinsic::aarch64_neon_ld4:
707 Info.ReadMem = true;
708 Info.WriteMem = false;
Chad Rosierf9327d62015-01-26 22:51:15 +0000709 Info.PtrVal = Inst->getArgOperand(0);
710 break;
711 case Intrinsic::aarch64_neon_st2:
712 case Intrinsic::aarch64_neon_st3:
713 case Intrinsic::aarch64_neon_st4:
714 Info.ReadMem = false;
715 Info.WriteMem = true;
Chad Rosierf9327d62015-01-26 22:51:15 +0000716 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
717 break;
718 }
719
720 switch (Inst->getIntrinsicID()) {
721 default:
722 return false;
723 case Intrinsic::aarch64_neon_ld2:
724 case Intrinsic::aarch64_neon_st2:
725 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
726 break;
727 case Intrinsic::aarch64_neon_ld3:
728 case Intrinsic::aarch64_neon_st3:
729 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
730 break;
731 case Intrinsic::aarch64_neon_ld4:
732 case Intrinsic::aarch64_neon_st4:
733 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
734 break;
735 }
736 return true;
737}
Adam Nemet53e758f2016-03-18 00:27:29 +0000738
Jun Bum Limdee55652017-04-03 19:20:07 +0000739/// See if \p I should be considered for address type promotion. We check if \p
740/// I is a sext with right type and used in memory accesses. If it used in a
741/// "complex" getelementptr, we allow it to be promoted without finding other
742/// sext instructions that sign extended the same initial value. A getelementptr
743/// is considered as "complex" if it has more than 2 operands.
744bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
745 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
746 bool Considerable = false;
747 AllowPromotionWithoutCommonHeader = false;
748 if (!isa<SExtInst>(&I))
749 return false;
750 Type *ConsideredSExtType =
751 Type::getInt64Ty(I.getParent()->getParent()->getContext());
752 if (I.getType() != ConsideredSExtType)
753 return false;
754 // See if the sext is the one with the right type and used in at least one
755 // GetElementPtrInst.
756 for (const User *U : I.users()) {
757 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
758 Considerable = true;
759 // A getelementptr is considered as "complex" if it has more than 2
760 // operands. We will promote a SExt used in such complex GEP as we
761 // expect some computation to be merged if they are done on 64 bits.
762 if (GEPInst->getNumOperands() > 2) {
763 AllowPromotionWithoutCommonHeader = true;
764 break;
765 }
766 }
767 }
768 return Considerable;
769}
770
Adam Nemet53e758f2016-03-18 00:27:29 +0000771unsigned AArch64TTIImpl::getCacheLineSize() {
Matthias Braun651cff42016-06-02 18:03:53 +0000772 return ST->getCacheLineSize();
Adam Nemet53e758f2016-03-18 00:27:29 +0000773}
774
775unsigned AArch64TTIImpl::getPrefetchDistance() {
Matthias Braun651cff42016-06-02 18:03:53 +0000776 return ST->getPrefetchDistance();
Adam Nemet53e758f2016-03-18 00:27:29 +0000777}
Adam Nemet6d8beec2016-03-18 00:27:38 +0000778
779unsigned AArch64TTIImpl::getMinPrefetchStride() {
Matthias Braun651cff42016-06-02 18:03:53 +0000780 return ST->getMinPrefetchStride();
Adam Nemet6d8beec2016-03-18 00:27:38 +0000781}
Adam Nemet709e3042016-03-18 00:27:43 +0000782
783unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
Matthias Braun651cff42016-06-02 18:03:53 +0000784 return ST->getMaxPrefetchIterationsAhead();
Adam Nemet709e3042016-03-18 00:27:43 +0000785}
Amara Emersonc9916d72017-05-16 21:29:22 +0000786
787bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
788 TTI::ReductionFlags Flags) const {
789 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
790 unsigned ScalarBits = Ty->getScalarSizeInBits();
791 switch (Opcode) {
792 case Instruction::FAdd:
793 case Instruction::FMul:
794 case Instruction::And:
795 case Instruction::Or:
796 case Instruction::Xor:
797 case Instruction::Mul:
798 return false;
799 case Instruction::Add:
800 return ScalarBits * Ty->getVectorNumElements() >= 128;
801 case Instruction::ICmp:
802 return (ScalarBits < 64) &&
803 (ScalarBits * Ty->getVectorNumElements() >= 128);
804 case Instruction::FCmp:
805 return Flags.NoNaN;
806 default:
807 llvm_unreachable("Unhandled reduction opcode");
808 }
809 return false;
810}