blob: aee298998f3a512a391f50561fa36a9bbdfce84e [file] [log] [blame]
Chandler Carruth93dcdc42015-01-31 11:17:59 +00001//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00009
Chandler Carruth93dcdc42015-01-31 11:17:59 +000010#include "AArch64TargetTransformInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000011#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/TargetTransformInfo.h"
Kevin Qinaef68412015-03-09 06:14:28 +000013#include "llvm/Analysis/LoopInfo.h"
Chandler Carruth705b1852015-01-31 03:43:40 +000014#include "llvm/CodeGen/BasicTTIImpl.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000015#include "llvm/Support/Debug.h"
16#include "llvm/Target/CostTable.h"
17#include "llvm/Target/TargetLowering.h"
18#include <algorithm>
19using namespace llvm;
20
21#define DEBUG_TYPE "aarch64tti"
22
Adam Nemet53e758f2016-03-18 00:27:29 +000023static cl::opt<unsigned> CyclonePrefetchDistance(
24 "cyclone-prefetch-distance",
25 cl::desc("Number of instructions to prefetch ahead for Cyclone"),
26 cl::init(280), cl::Hidden);
27
Adam Nemet6d8beec2016-03-18 00:27:38 +000028// The HW prefetcher handles accesses with strides up to 2KB.
29static cl::opt<unsigned> CycloneMinPrefetchStride(
30 "cyclone-min-prefetch-stride",
31 cl::desc("Min stride to add prefetches for Cyclone"),
32 cl::init(2048), cl::Hidden);
33
Tim Northover3b0846e2014-05-24 12:50:23 +000034/// \brief Calculate the cost of materializing a 64-bit value. This helper
35/// method might only calculate a fraction of a larger immediate. Therefore it
36/// is valid to return a cost of ZERO.
Chandler Carruth93205eb2015-08-05 18:08:10 +000037int AArch64TTIImpl::getIntImmCost(int64_t Val) {
Tim Northover3b0846e2014-05-24 12:50:23 +000038 // Check if the immediate can be encoded within an instruction.
39 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
40 return 0;
41
42 if (Val < 0)
43 Val = ~Val;
44
45 // Calculate how many moves we will need to materialize this constant.
46 unsigned LZ = countLeadingZeros((uint64_t)Val);
47 return (64 - LZ + 15) / 16;
48}
49
50/// \brief Calculate the cost of materializing the given constant.
Chandler Carruth93205eb2015-08-05 18:08:10 +000051int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +000052 assert(Ty->isIntegerTy());
53
54 unsigned BitSize = Ty->getPrimitiveSizeInBits();
55 if (BitSize == 0)
56 return ~0U;
57
58 // Sign-extend all constants to a multiple of 64-bit.
59 APInt ImmVal = Imm;
60 if (BitSize & 0x3f)
61 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
62
63 // Split the constant into 64-bit chunks and calculate the cost for each
64 // chunk.
Chandler Carruth93205eb2015-08-05 18:08:10 +000065 int Cost = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +000066 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
67 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
68 int64_t Val = Tmp.getSExtValue();
69 Cost += getIntImmCost(Val);
70 }
71 // We need at least one instruction to materialze the constant.
Chandler Carruth93205eb2015-08-05 18:08:10 +000072 return std::max(1, Cost);
Tim Northover3b0846e2014-05-24 12:50:23 +000073}
74
Chandler Carruth93205eb2015-08-05 18:08:10 +000075int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
76 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +000077 assert(Ty->isIntegerTy());
78
79 unsigned BitSize = Ty->getPrimitiveSizeInBits();
80 // There is no cost model for constants with a bit size of 0. Return TCC_Free
81 // here, so that constant hoisting will ignore this constant.
82 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +000083 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000084
85 unsigned ImmIdx = ~0U;
86 switch (Opcode) {
87 default:
Chandler Carruth705b1852015-01-31 03:43:40 +000088 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000089 case Instruction::GetElementPtr:
90 // Always hoist the base address of a GetElementPtr.
91 if (Idx == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +000092 return 2 * TTI::TCC_Basic;
93 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000094 case Instruction::Store:
95 ImmIdx = 0;
96 break;
97 case Instruction::Add:
98 case Instruction::Sub:
99 case Instruction::Mul:
100 case Instruction::UDiv:
101 case Instruction::SDiv:
102 case Instruction::URem:
103 case Instruction::SRem:
104 case Instruction::And:
105 case Instruction::Or:
106 case Instruction::Xor:
107 case Instruction::ICmp:
108 ImmIdx = 1;
109 break;
110 // Always return TCC_Free for the shift value of a shift instruction.
111 case Instruction::Shl:
112 case Instruction::LShr:
113 case Instruction::AShr:
114 if (Idx == 1)
Chandler Carruth705b1852015-01-31 03:43:40 +0000115 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000116 break;
117 case Instruction::Trunc:
118 case Instruction::ZExt:
119 case Instruction::SExt:
120 case Instruction::IntToPtr:
121 case Instruction::PtrToInt:
122 case Instruction::BitCast:
123 case Instruction::PHI:
124 case Instruction::Call:
125 case Instruction::Select:
126 case Instruction::Ret:
127 case Instruction::Load:
128 break;
129 }
130
131 if (Idx == ImmIdx) {
Chandler Carruth93205eb2015-08-05 18:08:10 +0000132 int NumConstants = (BitSize + 63) / 64;
133 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth705b1852015-01-31 03:43:40 +0000134 return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth93205eb2015-08-05 18:08:10 +0000135 ? static_cast<int>(TTI::TCC_Free)
Chandler Carruth705b1852015-01-31 03:43:40 +0000136 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000137 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000138 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000139}
140
Chandler Carruth93205eb2015-08-05 18:08:10 +0000141int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
142 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000143 assert(Ty->isIntegerTy());
144
145 unsigned BitSize = Ty->getPrimitiveSizeInBits();
146 // There is no cost model for constants with a bit size of 0. Return TCC_Free
147 // here, so that constant hoisting will ignore this constant.
148 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +0000149 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000150
151 switch (IID) {
152 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000153 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000154 case Intrinsic::sadd_with_overflow:
155 case Intrinsic::uadd_with_overflow:
156 case Intrinsic::ssub_with_overflow:
157 case Intrinsic::usub_with_overflow:
158 case Intrinsic::smul_with_overflow:
159 case Intrinsic::umul_with_overflow:
160 if (Idx == 1) {
Chandler Carruth93205eb2015-08-05 18:08:10 +0000161 int NumConstants = (BitSize + 63) / 64;
162 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth705b1852015-01-31 03:43:40 +0000163 return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth93205eb2015-08-05 18:08:10 +0000164 ? static_cast<int>(TTI::TCC_Free)
Chandler Carruth705b1852015-01-31 03:43:40 +0000165 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000166 }
167 break;
168 case Intrinsic::experimental_stackmap:
169 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000170 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000171 break;
172 case Intrinsic::experimental_patchpoint_void:
173 case Intrinsic::experimental_patchpoint_i64:
174 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000175 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000176 break;
177 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000178 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000179}
180
Chandler Carruth705b1852015-01-31 03:43:40 +0000181TargetTransformInfo::PopcntSupportKind
182AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000183 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
184 if (TyWidth == 32 || TyWidth == 64)
Chandler Carruth705b1852015-01-31 03:43:40 +0000185 return TTI::PSK_FastHardware;
Tim Northover3b0846e2014-05-24 12:50:23 +0000186 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
Chandler Carruth705b1852015-01-31 03:43:40 +0000187 return TTI::PSK_Software;
Tim Northover3b0846e2014-05-24 12:50:23 +0000188}
189
Chandler Carruth93205eb2015-08-05 18:08:10 +0000190int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000191 int ISD = TLI->InstructionOpcodeToISD(Opcode);
192 assert(ISD && "Invalid opcode");
193
Mehdi Amini44ede332015-07-09 02:09:04 +0000194 EVT SrcTy = TLI->getValueType(DL, Src);
195 EVT DstTy = TLI->getValueType(DL, Dst);
Tim Northover3b0846e2014-05-24 12:50:23 +0000196
197 if (!SrcTy.isSimple() || !DstTy.isSimple())
Chandler Carruth705b1852015-01-31 03:43:40 +0000198 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000199
Craig Topper4b275762015-10-28 04:02:12 +0000200 static const TypeConversionCostTblEntry
Craig Topper7bf52c92015-10-25 00:27:14 +0000201 ConversionTbl[] = {
Matthew Simpson343af072015-11-18 18:03:06 +0000202 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
203 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
204 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
205 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
Silviu Barangab322aa62015-08-17 16:05:09 +0000206
207 // The number of shll instructions for the extension.
Matthew Simpson343af072015-11-18 18:03:06 +0000208 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
209 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
210 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
211 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
212 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
213 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
214 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
215 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
216 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
217 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
218 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
219 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
220 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
221 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
Silviu Barangab322aa62015-08-17 16:05:09 +0000222 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
223 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
224
Tim Northover3b0846e2014-05-24 12:50:23 +0000225 // LowerVectorINT_TO_FP:
226 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000227 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000228 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
229 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000230 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000231 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000232
233 // Complex: to v2f32
234 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
235 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000236 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000237 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
238 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000239 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000240
241 // Complex: to v4f32
242 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
243 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
244 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
245 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
246
Silviu Barangab322aa62015-08-17 16:05:09 +0000247 // Complex: to v8f32
248 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
249 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
250 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
251 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
252
253 // Complex: to v16f32
254 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
255 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
256
Tim Northoveref0d7602014-06-15 09:27:06 +0000257 // Complex: to v2f64
258 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
259 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
260 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
261 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
262 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
263 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
264
265
Tim Northover3b0846e2014-05-24 12:50:23 +0000266 // LowerVectorFP_TO_INT
Tim Northoveref0d7602014-06-15 09:27:06 +0000267 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000268 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
269 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000270 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000271 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
272 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000273
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000274 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
Tim Northoveref0d7602014-06-15 09:27:06 +0000275 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000276 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
277 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000278 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000279 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
280 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
281
282 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
283 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
284 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000285 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000286 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
287
288 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
289 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
290 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
291 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
292 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
293 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
294 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000295 };
296
Craig Topperee0c8592015-10-27 04:14:24 +0000297 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
298 DstTy.getSimpleVT(),
299 SrcTy.getSimpleVT()))
300 return Entry->Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000301
Chandler Carruth705b1852015-01-31 03:43:40 +0000302 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000303}
304
Chandler Carruth93205eb2015-08-05 18:08:10 +0000305int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
306 unsigned Index) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000307 assert(Val->isVectorTy() && "This must be a vector type");
308
309 if (Index != -1U) {
310 // Legalize the type.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000311 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
Tim Northover3b0846e2014-05-24 12:50:23 +0000312
313 // This type is legalized to a scalar type.
314 if (!LT.second.isVector())
315 return 0;
316
317 // The type may be split. Normalize the index to the new type.
318 unsigned Width = LT.second.getVectorNumElements();
319 Index = Index % Width;
320
321 // The element at index zero is already inside the vector.
322 if (Index == 0)
323 return 0;
324 }
325
326 // All other insert/extracts cost this much.
Matthew Simpson921ad012016-02-18 18:35:45 +0000327 if (ST->isKryo())
328 return 2;
Silviu Barangab322aa62015-08-17 16:05:09 +0000329 return 3;
Tim Northover3b0846e2014-05-24 12:50:23 +0000330}
331
Chandler Carruth93205eb2015-08-05 18:08:10 +0000332int AArch64TTIImpl::getArithmeticInstrCost(
Chandler Carruth705b1852015-01-31 03:43:40 +0000333 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
334 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
335 TTI::OperandValueProperties Opd2PropInfo) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000336 // Legalize the type.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000337 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000338
339 int ISD = TLI->InstructionOpcodeToISD(Opcode);
340
Chad Rosier70d54ac2014-09-29 13:59:31 +0000341 if (ISD == ISD::SDIV &&
342 Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
343 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
344 // On AArch64, scalar signed division by constants power-of-two are
345 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
346 // The OperandValue properties many not be same as that of previous
347 // operation; conservatively assume OP_None.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000348 int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
349 TargetTransformInfo::OP_None,
350 TargetTransformInfo::OP_None);
Chad Rosier70d54ac2014-09-29 13:59:31 +0000351 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
352 TargetTransformInfo::OP_None,
353 TargetTransformInfo::OP_None);
354 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
355 TargetTransformInfo::OP_None,
356 TargetTransformInfo::OP_None);
357 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
358 TargetTransformInfo::OP_None,
359 TargetTransformInfo::OP_None);
360 return Cost;
361 }
362
Tim Northover3b0846e2014-05-24 12:50:23 +0000363 switch (ISD) {
364 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000365 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
366 Opd1PropInfo, Opd2PropInfo);
Tim Northover3b0846e2014-05-24 12:50:23 +0000367 case ISD::ADD:
368 case ISD::MUL:
369 case ISD::XOR:
370 case ISD::OR:
371 case ISD::AND:
372 // These nodes are marked as 'custom' for combining purposes only.
373 // We know that they are legal. See LowerAdd in ISelLowering.
374 return 1 * LT.first;
375 }
376}
377
Chandler Carruth93205eb2015-08-05 18:08:10 +0000378int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000379 // Address computations in vectorized code with non-consecutive addresses will
380 // likely result in more instructions compared to scalar code where the
381 // computation can more often be merged into the index mode. The resulting
382 // extra micro-ops can significantly decrease throughput.
383 unsigned NumVectorInstToHideOverhead = 10;
384
385 if (Ty->isVectorTy() && IsComplex)
386 return NumVectorInstToHideOverhead;
387
388 // In many cases the address computation is not merged into the instruction
389 // addressing mode.
390 return 1;
391}
392
Chandler Carruth93205eb2015-08-05 18:08:10 +0000393int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
394 Type *CondTy) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000395
396 int ISD = TLI->InstructionOpcodeToISD(Opcode);
Silviu Barangaa3e27ed2015-09-09 15:35:02 +0000397 // We don't lower some vector selects well that are wider than the register
398 // width.
Tim Northover3b0846e2014-05-24 12:50:23 +0000399 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
400 // We would need this many instructions to hide the scalarization happening.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000401 const int AmortizationCost = 20;
Craig Topper4b275762015-10-28 04:02:12 +0000402 static const TypeConversionCostTblEntry
Tim Northover3b0846e2014-05-24 12:50:23 +0000403 VectorSelectTbl[] = {
Silviu Barangaa3e27ed2015-09-09 15:35:02 +0000404 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
405 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
406 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000407 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
408 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
409 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
410 };
411
Mehdi Amini44ede332015-07-09 02:09:04 +0000412 EVT SelCondTy = TLI->getValueType(DL, CondTy);
413 EVT SelValTy = TLI->getValueType(DL, ValTy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000414 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
Craig Topperee0c8592015-10-27 04:14:24 +0000415 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
416 SelCondTy.getSimpleVT(),
417 SelValTy.getSimpleVT()))
418 return Entry->Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000419 }
420 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000421 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000422}
423
Chandler Carruth93205eb2015-08-05 18:08:10 +0000424int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
425 unsigned Alignment, unsigned AddressSpace) {
426 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000427
428 if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
429 Src->getVectorElementType()->isIntegerTy(64)) {
430 // Unaligned stores are extremely inefficient. We don't split
431 // unaligned v2i64 stores because the negative impact that has shown in
432 // practice on inlined memcpy code.
433 // We make v2i64 stores expensive so that we will only vectorize if there
434 // are 6 other instructions getting vectorized.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000435 int AmortizationCost = 6;
Tim Northover3b0846e2014-05-24 12:50:23 +0000436
437 return LT.first * 2 * AmortizationCost;
438 }
439
440 if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
441 Src->getVectorNumElements() < 8) {
442 // We scalarize the loads/stores because there is not v.4b register and we
443 // have to promote the elements to v.4h.
444 unsigned NumVecElts = Src->getVectorNumElements();
445 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
446 // We generate 2 instructions per vector element.
447 return NumVectorizableInstsToAmortize * NumVecElts * 2;
448 }
449
450 return LT.first;
451}
James Molloy2b8933c2014-08-05 12:30:34 +0000452
Chandler Carruth93205eb2015-08-05 18:08:10 +0000453int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
454 unsigned Factor,
455 ArrayRef<unsigned> Indices,
456 unsigned Alignment,
457 unsigned AddressSpace) {
Hao Liu7ec8ee32015-06-26 02:32:07 +0000458 assert(Factor >= 2 && "Invalid interleave factor");
459 assert(isa<VectorType>(VecTy) && "Expect a vector type");
460
461 if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
462 unsigned NumElts = VecTy->getVectorNumElements();
463 Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
Ahmed Bougacha97564c32015-12-09 01:19:50 +0000464 unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
Hao Liu7ec8ee32015-06-26 02:32:07 +0000465
466 // ldN/stN only support legal vector types of size 64 or 128 in bits.
467 if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
468 return Factor;
469 }
470
471 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
472 Alignment, AddressSpace);
473}
474
Chandler Carruth93205eb2015-08-05 18:08:10 +0000475int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
476 int Cost = 0;
James Molloy2b8933c2014-08-05 12:30:34 +0000477 for (auto *I : Tys) {
478 if (!I->isVectorTy())
479 continue;
480 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
481 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
482 getMemoryOpCost(Instruction::Load, I, 128, 0);
483 }
484 return Cost;
485}
James Molloya88896b2014-08-21 00:02:51 +0000486
Wei Mi062c7442015-05-06 17:12:25 +0000487unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
Chad Rosiercd2be7f2016-02-12 15:51:51 +0000488 if (ST->isCortexA57() || ST->isKryo())
James Molloya88896b2014-08-21 00:02:51 +0000489 return 4;
490 return 2;
491}
Kevin Qin72a799a2014-10-09 10:13:27 +0000492
Chandler Carruthab5cb362015-02-01 14:31:23 +0000493void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
Chandler Carruth705b1852015-01-31 03:43:40 +0000494 TTI::UnrollingPreferences &UP) {
Kevin Qinaef68412015-03-09 06:14:28 +0000495 // Enable partial unrolling and runtime unrolling.
496 BaseT::getUnrollingPreferences(L, UP);
497
498 // For inner loop, it is more likely to be a hot one, and the runtime check
499 // can be promoted out from LICM pass, so the overhead is less, let's try
500 // a larger threshold to unroll more loops.
501 if (L->getLoopDepth() > 1)
502 UP.PartialThreshold *= 2;
503
Kevin Qin72a799a2014-10-09 10:13:27 +0000504 // Disable partial & runtime unrolling on -Os.
505 UP.PartialOptSizeThreshold = 0;
506}
Chad Rosierf9327d62015-01-26 22:51:15 +0000507
Chandler Carruth705b1852015-01-31 03:43:40 +0000508Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
509 Type *ExpectedType) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000510 switch (Inst->getIntrinsicID()) {
511 default:
512 return nullptr;
513 case Intrinsic::aarch64_neon_st2:
514 case Intrinsic::aarch64_neon_st3:
515 case Intrinsic::aarch64_neon_st4: {
516 // Create a struct type
517 StructType *ST = dyn_cast<StructType>(ExpectedType);
518 if (!ST)
519 return nullptr;
520 unsigned NumElts = Inst->getNumArgOperands() - 1;
521 if (ST->getNumElements() != NumElts)
522 return nullptr;
523 for (unsigned i = 0, e = NumElts; i != e; ++i) {
524 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
525 return nullptr;
526 }
527 Value *Res = UndefValue::get(ExpectedType);
528 IRBuilder<> Builder(Inst);
529 for (unsigned i = 0, e = NumElts; i != e; ++i) {
530 Value *L = Inst->getArgOperand(i);
531 Res = Builder.CreateInsertValue(Res, L, i);
532 }
533 return Res;
534 }
535 case Intrinsic::aarch64_neon_ld2:
536 case Intrinsic::aarch64_neon_ld3:
537 case Intrinsic::aarch64_neon_ld4:
538 if (Inst->getType() == ExpectedType)
539 return Inst;
540 return nullptr;
541 }
542}
543
Chandler Carruth705b1852015-01-31 03:43:40 +0000544bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
545 MemIntrinsicInfo &Info) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000546 switch (Inst->getIntrinsicID()) {
547 default:
548 break;
549 case Intrinsic::aarch64_neon_ld2:
550 case Intrinsic::aarch64_neon_ld3:
551 case Intrinsic::aarch64_neon_ld4:
552 Info.ReadMem = true;
553 Info.WriteMem = false;
Philip Reames7c6692de2015-12-05 00:18:33 +0000554 Info.IsSimple = true;
Chad Rosierf9327d62015-01-26 22:51:15 +0000555 Info.NumMemRefs = 1;
556 Info.PtrVal = Inst->getArgOperand(0);
557 break;
558 case Intrinsic::aarch64_neon_st2:
559 case Intrinsic::aarch64_neon_st3:
560 case Intrinsic::aarch64_neon_st4:
561 Info.ReadMem = false;
562 Info.WriteMem = true;
Philip Reames7c6692de2015-12-05 00:18:33 +0000563 Info.IsSimple = true;
Chad Rosierf9327d62015-01-26 22:51:15 +0000564 Info.NumMemRefs = 1;
565 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
566 break;
567 }
568
569 switch (Inst->getIntrinsicID()) {
570 default:
571 return false;
572 case Intrinsic::aarch64_neon_ld2:
573 case Intrinsic::aarch64_neon_st2:
574 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
575 break;
576 case Intrinsic::aarch64_neon_ld3:
577 case Intrinsic::aarch64_neon_st3:
578 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
579 break;
580 case Intrinsic::aarch64_neon_ld4:
581 case Intrinsic::aarch64_neon_st4:
582 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
583 break;
584 }
585 return true;
586}
Adam Nemet53e758f2016-03-18 00:27:29 +0000587
588unsigned AArch64TTIImpl::getCacheLineSize() {
589 if (ST->isCyclone())
590 return 64;
591 return BaseT::getCacheLineSize();
592}
593
594unsigned AArch64TTIImpl::getPrefetchDistance() {
595 if (ST->isCyclone())
596 return CyclonePrefetchDistance;
597 return BaseT::getPrefetchDistance();
598}
Adam Nemet6d8beec2016-03-18 00:27:38 +0000599
600unsigned AArch64TTIImpl::getMinPrefetchStride() {
601 if (ST->isCyclone())
602 return CycloneMinPrefetchStride;
603 return BaseT::getMinPrefetchStride();
604}