blob: 2b3fae958f40c7bfb0d4541ccf7d4db84b27abec [file] [log] [blame]
Chandler Carruth93dcdc42015-01-31 11:17:59 +00001//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00009
Chandler Carruth93dcdc42015-01-31 11:17:59 +000010#include "AArch64TargetTransformInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000011#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/TargetTransformInfo.h"
Kevin Qinaef68412015-03-09 06:14:28 +000013#include "llvm/Analysis/LoopInfo.h"
Chandler Carruth705b1852015-01-31 03:43:40 +000014#include "llvm/CodeGen/BasicTTIImpl.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000015#include "llvm/Support/Debug.h"
16#include "llvm/Target/CostTable.h"
17#include "llvm/Target/TargetLowering.h"
18#include <algorithm>
19using namespace llvm;
20
21#define DEBUG_TYPE "aarch64tti"
22
Adam Nemet53e758f2016-03-18 00:27:29 +000023static cl::opt<unsigned> CyclonePrefetchDistance(
24 "cyclone-prefetch-distance",
25 cl::desc("Number of instructions to prefetch ahead for Cyclone"),
26 cl::init(280), cl::Hidden);
27
Adam Nemet6d8beec2016-03-18 00:27:38 +000028// The HW prefetcher handles accesses with strides up to 2KB.
29static cl::opt<unsigned> CycloneMinPrefetchStride(
30 "cyclone-min-prefetch-stride",
31 cl::desc("Min stride to add prefetches for Cyclone"),
32 cl::init(2048), cl::Hidden);
33
Adam Nemet709e3042016-03-18 00:27:43 +000034// Be conservative for now and don't prefetch ahead too much since the loop
35// may terminate early.
36static cl::opt<unsigned> CycloneMaxPrefetchIterationsAhead(
37 "cyclone-max-prefetch-iters-ahead",
38 cl::desc("Max number of iterations to prefetch ahead on Cyclone"),
39 cl::init(3), cl::Hidden);
40
Tim Northover3b0846e2014-05-24 12:50:23 +000041/// \brief Calculate the cost of materializing a 64-bit value. This helper
42/// method might only calculate a fraction of a larger immediate. Therefore it
43/// is valid to return a cost of ZERO.
Chandler Carruth93205eb2015-08-05 18:08:10 +000044int AArch64TTIImpl::getIntImmCost(int64_t Val) {
Tim Northover3b0846e2014-05-24 12:50:23 +000045 // Check if the immediate can be encoded within an instruction.
46 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47 return 0;
48
49 if (Val < 0)
50 Val = ~Val;
51
52 // Calculate how many moves we will need to materialize this constant.
53 unsigned LZ = countLeadingZeros((uint64_t)Val);
54 return (64 - LZ + 15) / 16;
55}
56
57/// \brief Calculate the cost of materializing the given constant.
Chandler Carruth93205eb2015-08-05 18:08:10 +000058int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +000059 assert(Ty->isIntegerTy());
60
61 unsigned BitSize = Ty->getPrimitiveSizeInBits();
62 if (BitSize == 0)
63 return ~0U;
64
65 // Sign-extend all constants to a multiple of 64-bit.
66 APInt ImmVal = Imm;
67 if (BitSize & 0x3f)
68 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
69
70 // Split the constant into 64-bit chunks and calculate the cost for each
71 // chunk.
Chandler Carruth93205eb2015-08-05 18:08:10 +000072 int Cost = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +000073 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
74 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
75 int64_t Val = Tmp.getSExtValue();
76 Cost += getIntImmCost(Val);
77 }
78 // We need at least one instruction to materialze the constant.
Chandler Carruth93205eb2015-08-05 18:08:10 +000079 return std::max(1, Cost);
Tim Northover3b0846e2014-05-24 12:50:23 +000080}
81
Chandler Carruth93205eb2015-08-05 18:08:10 +000082int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
83 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +000084 assert(Ty->isIntegerTy());
85
86 unsigned BitSize = Ty->getPrimitiveSizeInBits();
87 // There is no cost model for constants with a bit size of 0. Return TCC_Free
88 // here, so that constant hoisting will ignore this constant.
89 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +000090 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000091
92 unsigned ImmIdx = ~0U;
93 switch (Opcode) {
94 default:
Chandler Carruth705b1852015-01-31 03:43:40 +000095 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000096 case Instruction::GetElementPtr:
97 // Always hoist the base address of a GetElementPtr.
98 if (Idx == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +000099 return 2 * TTI::TCC_Basic;
100 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000101 case Instruction::Store:
102 ImmIdx = 0;
103 break;
104 case Instruction::Add:
105 case Instruction::Sub:
106 case Instruction::Mul:
107 case Instruction::UDiv:
108 case Instruction::SDiv:
109 case Instruction::URem:
110 case Instruction::SRem:
111 case Instruction::And:
112 case Instruction::Or:
113 case Instruction::Xor:
114 case Instruction::ICmp:
115 ImmIdx = 1;
116 break;
117 // Always return TCC_Free for the shift value of a shift instruction.
118 case Instruction::Shl:
119 case Instruction::LShr:
120 case Instruction::AShr:
121 if (Idx == 1)
Chandler Carruth705b1852015-01-31 03:43:40 +0000122 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000123 break;
124 case Instruction::Trunc:
125 case Instruction::ZExt:
126 case Instruction::SExt:
127 case Instruction::IntToPtr:
128 case Instruction::PtrToInt:
129 case Instruction::BitCast:
130 case Instruction::PHI:
131 case Instruction::Call:
132 case Instruction::Select:
133 case Instruction::Ret:
134 case Instruction::Load:
135 break;
136 }
137
138 if (Idx == ImmIdx) {
Chandler Carruth93205eb2015-08-05 18:08:10 +0000139 int NumConstants = (BitSize + 63) / 64;
140 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth705b1852015-01-31 03:43:40 +0000141 return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth93205eb2015-08-05 18:08:10 +0000142 ? static_cast<int>(TTI::TCC_Free)
Chandler Carruth705b1852015-01-31 03:43:40 +0000143 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000144 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000145 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000146}
147
Chandler Carruth93205eb2015-08-05 18:08:10 +0000148int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
149 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000150 assert(Ty->isIntegerTy());
151
152 unsigned BitSize = Ty->getPrimitiveSizeInBits();
153 // There is no cost model for constants with a bit size of 0. Return TCC_Free
154 // here, so that constant hoisting will ignore this constant.
155 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +0000156 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000157
158 switch (IID) {
159 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000160 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000161 case Intrinsic::sadd_with_overflow:
162 case Intrinsic::uadd_with_overflow:
163 case Intrinsic::ssub_with_overflow:
164 case Intrinsic::usub_with_overflow:
165 case Intrinsic::smul_with_overflow:
166 case Intrinsic::umul_with_overflow:
167 if (Idx == 1) {
Chandler Carruth93205eb2015-08-05 18:08:10 +0000168 int NumConstants = (BitSize + 63) / 64;
169 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth705b1852015-01-31 03:43:40 +0000170 return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth93205eb2015-08-05 18:08:10 +0000171 ? static_cast<int>(TTI::TCC_Free)
Chandler Carruth705b1852015-01-31 03:43:40 +0000172 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000173 }
174 break;
175 case Intrinsic::experimental_stackmap:
176 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000177 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000178 break;
179 case Intrinsic::experimental_patchpoint_void:
180 case Intrinsic::experimental_patchpoint_i64:
181 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000182 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000183 break;
184 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000185 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000186}
187
Chandler Carruth705b1852015-01-31 03:43:40 +0000188TargetTransformInfo::PopcntSupportKind
189AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000190 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
191 if (TyWidth == 32 || TyWidth == 64)
Chandler Carruth705b1852015-01-31 03:43:40 +0000192 return TTI::PSK_FastHardware;
Tim Northover3b0846e2014-05-24 12:50:23 +0000193 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
Chandler Carruth705b1852015-01-31 03:43:40 +0000194 return TTI::PSK_Software;
Tim Northover3b0846e2014-05-24 12:50:23 +0000195}
196
Chandler Carruth93205eb2015-08-05 18:08:10 +0000197int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000198 int ISD = TLI->InstructionOpcodeToISD(Opcode);
199 assert(ISD && "Invalid opcode");
200
Mehdi Amini44ede332015-07-09 02:09:04 +0000201 EVT SrcTy = TLI->getValueType(DL, Src);
202 EVT DstTy = TLI->getValueType(DL, Dst);
Tim Northover3b0846e2014-05-24 12:50:23 +0000203
204 if (!SrcTy.isSimple() || !DstTy.isSimple())
Chandler Carruth705b1852015-01-31 03:43:40 +0000205 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000206
Craig Topper4b275762015-10-28 04:02:12 +0000207 static const TypeConversionCostTblEntry
Craig Topper7bf52c92015-10-25 00:27:14 +0000208 ConversionTbl[] = {
Matthew Simpson343af072015-11-18 18:03:06 +0000209 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
210 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
211 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
212 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
Silviu Barangab322aa62015-08-17 16:05:09 +0000213
214 // The number of shll instructions for the extension.
Matthew Simpson343af072015-11-18 18:03:06 +0000215 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
216 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
217 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
218 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
219 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
220 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
221 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
222 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
223 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
224 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
225 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
226 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
227 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
228 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
Silviu Barangab322aa62015-08-17 16:05:09 +0000229 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
230 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
231
Tim Northover3b0846e2014-05-24 12:50:23 +0000232 // LowerVectorINT_TO_FP:
233 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000234 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000235 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
236 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000237 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000238 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000239
240 // Complex: to v2f32
241 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
242 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000243 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000244 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
245 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000246 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000247
248 // Complex: to v4f32
249 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
250 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
251 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
252 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
253
Silviu Barangab322aa62015-08-17 16:05:09 +0000254 // Complex: to v8f32
255 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
256 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
257 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
258 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
259
260 // Complex: to v16f32
261 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
262 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
263
Tim Northoveref0d7602014-06-15 09:27:06 +0000264 // Complex: to v2f64
265 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
266 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
267 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
268 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
269 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
270 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
271
272
Tim Northover3b0846e2014-05-24 12:50:23 +0000273 // LowerVectorFP_TO_INT
Tim Northoveref0d7602014-06-15 09:27:06 +0000274 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000275 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
276 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000277 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000278 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
279 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000280
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000281 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
Tim Northoveref0d7602014-06-15 09:27:06 +0000282 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000283 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
284 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000285 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000286 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
287 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
288
289 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
290 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
291 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000292 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000293 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
294
295 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
296 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
297 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
298 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
299 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
300 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
301 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000302 };
303
Craig Topperee0c8592015-10-27 04:14:24 +0000304 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
305 DstTy.getSimpleVT(),
306 SrcTy.getSimpleVT()))
307 return Entry->Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000308
Chandler Carruth705b1852015-01-31 03:43:40 +0000309 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000310}
311
Chandler Carruth93205eb2015-08-05 18:08:10 +0000312int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
313 unsigned Index) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000314 assert(Val->isVectorTy() && "This must be a vector type");
315
316 if (Index != -1U) {
317 // Legalize the type.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000318 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
Tim Northover3b0846e2014-05-24 12:50:23 +0000319
320 // This type is legalized to a scalar type.
321 if (!LT.second.isVector())
322 return 0;
323
324 // The type may be split. Normalize the index to the new type.
325 unsigned Width = LT.second.getVectorNumElements();
326 Index = Index % Width;
327
328 // The element at index zero is already inside the vector.
329 if (Index == 0)
330 return 0;
331 }
332
333 // All other insert/extracts cost this much.
Matthew Simpson921ad012016-02-18 18:35:45 +0000334 if (ST->isKryo())
335 return 2;
Silviu Barangab322aa62015-08-17 16:05:09 +0000336 return 3;
Tim Northover3b0846e2014-05-24 12:50:23 +0000337}
338
Chandler Carruth93205eb2015-08-05 18:08:10 +0000339int AArch64TTIImpl::getArithmeticInstrCost(
Chandler Carruth705b1852015-01-31 03:43:40 +0000340 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
341 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
342 TTI::OperandValueProperties Opd2PropInfo) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000343 // Legalize the type.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000344 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000345
346 int ISD = TLI->InstructionOpcodeToISD(Opcode);
347
Chad Rosier70d54ac2014-09-29 13:59:31 +0000348 if (ISD == ISD::SDIV &&
349 Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
350 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
351 // On AArch64, scalar signed division by constants power-of-two are
352 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
353 // The OperandValue properties many not be same as that of previous
354 // operation; conservatively assume OP_None.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000355 int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
356 TargetTransformInfo::OP_None,
357 TargetTransformInfo::OP_None);
Chad Rosier70d54ac2014-09-29 13:59:31 +0000358 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
359 TargetTransformInfo::OP_None,
360 TargetTransformInfo::OP_None);
361 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
362 TargetTransformInfo::OP_None,
363 TargetTransformInfo::OP_None);
364 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
365 TargetTransformInfo::OP_None,
366 TargetTransformInfo::OP_None);
367 return Cost;
368 }
369
Tim Northover3b0846e2014-05-24 12:50:23 +0000370 switch (ISD) {
371 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000372 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
373 Opd1PropInfo, Opd2PropInfo);
Tim Northover3b0846e2014-05-24 12:50:23 +0000374 case ISD::ADD:
375 case ISD::MUL:
376 case ISD::XOR:
377 case ISD::OR:
378 case ISD::AND:
379 // These nodes are marked as 'custom' for combining purposes only.
380 // We know that they are legal. See LowerAdd in ISelLowering.
381 return 1 * LT.first;
382 }
383}
384
Chandler Carruth93205eb2015-08-05 18:08:10 +0000385int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000386 // Address computations in vectorized code with non-consecutive addresses will
387 // likely result in more instructions compared to scalar code where the
388 // computation can more often be merged into the index mode. The resulting
389 // extra micro-ops can significantly decrease throughput.
390 unsigned NumVectorInstToHideOverhead = 10;
391
392 if (Ty->isVectorTy() && IsComplex)
393 return NumVectorInstToHideOverhead;
394
395 // In many cases the address computation is not merged into the instruction
396 // addressing mode.
397 return 1;
398}
399
Chandler Carruth93205eb2015-08-05 18:08:10 +0000400int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
401 Type *CondTy) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000402
403 int ISD = TLI->InstructionOpcodeToISD(Opcode);
Silviu Barangaa3e27ed2015-09-09 15:35:02 +0000404 // We don't lower some vector selects well that are wider than the register
405 // width.
Tim Northover3b0846e2014-05-24 12:50:23 +0000406 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
407 // We would need this many instructions to hide the scalarization happening.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000408 const int AmortizationCost = 20;
Craig Topper4b275762015-10-28 04:02:12 +0000409 static const TypeConversionCostTblEntry
Tim Northover3b0846e2014-05-24 12:50:23 +0000410 VectorSelectTbl[] = {
Silviu Barangaa3e27ed2015-09-09 15:35:02 +0000411 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
412 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
413 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000414 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
415 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
416 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
417 };
418
Mehdi Amini44ede332015-07-09 02:09:04 +0000419 EVT SelCondTy = TLI->getValueType(DL, CondTy);
420 EVT SelValTy = TLI->getValueType(DL, ValTy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000421 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
Craig Topperee0c8592015-10-27 04:14:24 +0000422 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
423 SelCondTy.getSimpleVT(),
424 SelValTy.getSimpleVT()))
425 return Entry->Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000426 }
427 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000428 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000429}
430
Chandler Carruth93205eb2015-08-05 18:08:10 +0000431int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
432 unsigned Alignment, unsigned AddressSpace) {
433 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000434
435 if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
436 Src->getVectorElementType()->isIntegerTy(64)) {
437 // Unaligned stores are extremely inefficient. We don't split
438 // unaligned v2i64 stores because the negative impact that has shown in
439 // practice on inlined memcpy code.
440 // We make v2i64 stores expensive so that we will only vectorize if there
441 // are 6 other instructions getting vectorized.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000442 int AmortizationCost = 6;
Tim Northover3b0846e2014-05-24 12:50:23 +0000443
444 return LT.first * 2 * AmortizationCost;
445 }
446
447 if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
448 Src->getVectorNumElements() < 8) {
449 // We scalarize the loads/stores because there is not v.4b register and we
450 // have to promote the elements to v.4h.
451 unsigned NumVecElts = Src->getVectorNumElements();
452 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
453 // We generate 2 instructions per vector element.
454 return NumVectorizableInstsToAmortize * NumVecElts * 2;
455 }
456
457 return LT.first;
458}
James Molloy2b8933c2014-08-05 12:30:34 +0000459
Chandler Carruth93205eb2015-08-05 18:08:10 +0000460int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
461 unsigned Factor,
462 ArrayRef<unsigned> Indices,
463 unsigned Alignment,
464 unsigned AddressSpace) {
Hao Liu7ec8ee32015-06-26 02:32:07 +0000465 assert(Factor >= 2 && "Invalid interleave factor");
466 assert(isa<VectorType>(VecTy) && "Expect a vector type");
467
468 if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
469 unsigned NumElts = VecTy->getVectorNumElements();
470 Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
Ahmed Bougacha97564c32015-12-09 01:19:50 +0000471 unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
Hao Liu7ec8ee32015-06-26 02:32:07 +0000472
473 // ldN/stN only support legal vector types of size 64 or 128 in bits.
474 if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
475 return Factor;
476 }
477
478 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
479 Alignment, AddressSpace);
480}
481
Chandler Carruth93205eb2015-08-05 18:08:10 +0000482int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
483 int Cost = 0;
James Molloy2b8933c2014-08-05 12:30:34 +0000484 for (auto *I : Tys) {
485 if (!I->isVectorTy())
486 continue;
487 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
488 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
489 getMemoryOpCost(Instruction::Load, I, 128, 0);
490 }
491 return Cost;
492}
James Molloya88896b2014-08-21 00:02:51 +0000493
Wei Mi062c7442015-05-06 17:12:25 +0000494unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
Chad Rosiercd2be7f2016-02-12 15:51:51 +0000495 if (ST->isCortexA57() || ST->isKryo())
James Molloya88896b2014-08-21 00:02:51 +0000496 return 4;
497 return 2;
498}
Kevin Qin72a799a2014-10-09 10:13:27 +0000499
Chandler Carruthab5cb362015-02-01 14:31:23 +0000500void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
Chandler Carruth705b1852015-01-31 03:43:40 +0000501 TTI::UnrollingPreferences &UP) {
Kevin Qinaef68412015-03-09 06:14:28 +0000502 // Enable partial unrolling and runtime unrolling.
503 BaseT::getUnrollingPreferences(L, UP);
504
505 // For inner loop, it is more likely to be a hot one, and the runtime check
506 // can be promoted out from LICM pass, so the overhead is less, let's try
507 // a larger threshold to unroll more loops.
508 if (L->getLoopDepth() > 1)
509 UP.PartialThreshold *= 2;
510
Kevin Qin72a799a2014-10-09 10:13:27 +0000511 // Disable partial & runtime unrolling on -Os.
512 UP.PartialOptSizeThreshold = 0;
513}
Chad Rosierf9327d62015-01-26 22:51:15 +0000514
Chandler Carruth705b1852015-01-31 03:43:40 +0000515Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
516 Type *ExpectedType) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000517 switch (Inst->getIntrinsicID()) {
518 default:
519 return nullptr;
520 case Intrinsic::aarch64_neon_st2:
521 case Intrinsic::aarch64_neon_st3:
522 case Intrinsic::aarch64_neon_st4: {
523 // Create a struct type
524 StructType *ST = dyn_cast<StructType>(ExpectedType);
525 if (!ST)
526 return nullptr;
527 unsigned NumElts = Inst->getNumArgOperands() - 1;
528 if (ST->getNumElements() != NumElts)
529 return nullptr;
530 for (unsigned i = 0, e = NumElts; i != e; ++i) {
531 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
532 return nullptr;
533 }
534 Value *Res = UndefValue::get(ExpectedType);
535 IRBuilder<> Builder(Inst);
536 for (unsigned i = 0, e = NumElts; i != e; ++i) {
537 Value *L = Inst->getArgOperand(i);
538 Res = Builder.CreateInsertValue(Res, L, i);
539 }
540 return Res;
541 }
542 case Intrinsic::aarch64_neon_ld2:
543 case Intrinsic::aarch64_neon_ld3:
544 case Intrinsic::aarch64_neon_ld4:
545 if (Inst->getType() == ExpectedType)
546 return Inst;
547 return nullptr;
548 }
549}
550
Chandler Carruth705b1852015-01-31 03:43:40 +0000551bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
552 MemIntrinsicInfo &Info) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000553 switch (Inst->getIntrinsicID()) {
554 default:
555 break;
556 case Intrinsic::aarch64_neon_ld2:
557 case Intrinsic::aarch64_neon_ld3:
558 case Intrinsic::aarch64_neon_ld4:
559 Info.ReadMem = true;
560 Info.WriteMem = false;
Philip Reames7c6692de2015-12-05 00:18:33 +0000561 Info.IsSimple = true;
Chad Rosierf9327d62015-01-26 22:51:15 +0000562 Info.NumMemRefs = 1;
563 Info.PtrVal = Inst->getArgOperand(0);
564 break;
565 case Intrinsic::aarch64_neon_st2:
566 case Intrinsic::aarch64_neon_st3:
567 case Intrinsic::aarch64_neon_st4:
568 Info.ReadMem = false;
569 Info.WriteMem = true;
Philip Reames7c6692de2015-12-05 00:18:33 +0000570 Info.IsSimple = true;
Chad Rosierf9327d62015-01-26 22:51:15 +0000571 Info.NumMemRefs = 1;
572 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
573 break;
574 }
575
576 switch (Inst->getIntrinsicID()) {
577 default:
578 return false;
579 case Intrinsic::aarch64_neon_ld2:
580 case Intrinsic::aarch64_neon_st2:
581 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
582 break;
583 case Intrinsic::aarch64_neon_ld3:
584 case Intrinsic::aarch64_neon_st3:
585 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
586 break;
587 case Intrinsic::aarch64_neon_ld4:
588 case Intrinsic::aarch64_neon_st4:
589 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
590 break;
591 }
592 return true;
593}
Adam Nemet53e758f2016-03-18 00:27:29 +0000594
595unsigned AArch64TTIImpl::getCacheLineSize() {
596 if (ST->isCyclone())
597 return 64;
598 return BaseT::getCacheLineSize();
599}
600
601unsigned AArch64TTIImpl::getPrefetchDistance() {
602 if (ST->isCyclone())
603 return CyclonePrefetchDistance;
604 return BaseT::getPrefetchDistance();
605}
Adam Nemet6d8beec2016-03-18 00:27:38 +0000606
607unsigned AArch64TTIImpl::getMinPrefetchStride() {
608 if (ST->isCyclone())
609 return CycloneMinPrefetchStride;
610 return BaseT::getMinPrefetchStride();
611}
Adam Nemet709e3042016-03-18 00:27:43 +0000612
613unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
614 if (ST->isCyclone())
615 return CycloneMaxPrefetchIterationsAhead;
616 return BaseT::getMaxPrefetchIterationsAhead();
617}