blob: 5803e07dbcf656e5f91797c7be92cad30ae6cacf [file] [log] [blame]
Chandler Carruth93dcdc42015-01-31 11:17:59 +00001//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00009
Chandler Carruth93dcdc42015-01-31 11:17:59 +000010#include "AArch64TargetTransformInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000011#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/TargetTransformInfo.h"
Kevin Qinaef68412015-03-09 06:14:28 +000013#include "llvm/Analysis/LoopInfo.h"
Chandler Carruth705b1852015-01-31 03:43:40 +000014#include "llvm/CodeGen/BasicTTIImpl.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000015#include "llvm/Support/Debug.h"
16#include "llvm/Target/CostTable.h"
17#include "llvm/Target/TargetLowering.h"
18#include <algorithm>
19using namespace llvm;
20
21#define DEBUG_TYPE "aarch64tti"
22
Adam Nemet53e758f2016-03-18 00:27:29 +000023static cl::opt<unsigned> CyclonePrefetchDistance(
24 "cyclone-prefetch-distance",
25 cl::desc("Number of instructions to prefetch ahead for Cyclone"),
26 cl::init(280), cl::Hidden);
27
Tim Northover3b0846e2014-05-24 12:50:23 +000028/// \brief Calculate the cost of materializing a 64-bit value. This helper
29/// method might only calculate a fraction of a larger immediate. Therefore it
30/// is valid to return a cost of ZERO.
Chandler Carruth93205eb2015-08-05 18:08:10 +000031int AArch64TTIImpl::getIntImmCost(int64_t Val) {
Tim Northover3b0846e2014-05-24 12:50:23 +000032 // Check if the immediate can be encoded within an instruction.
33 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
34 return 0;
35
36 if (Val < 0)
37 Val = ~Val;
38
39 // Calculate how many moves we will need to materialize this constant.
40 unsigned LZ = countLeadingZeros((uint64_t)Val);
41 return (64 - LZ + 15) / 16;
42}
43
44/// \brief Calculate the cost of materializing the given constant.
Chandler Carruth93205eb2015-08-05 18:08:10 +000045int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +000046 assert(Ty->isIntegerTy());
47
48 unsigned BitSize = Ty->getPrimitiveSizeInBits();
49 if (BitSize == 0)
50 return ~0U;
51
52 // Sign-extend all constants to a multiple of 64-bit.
53 APInt ImmVal = Imm;
54 if (BitSize & 0x3f)
55 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
56
57 // Split the constant into 64-bit chunks and calculate the cost for each
58 // chunk.
Chandler Carruth93205eb2015-08-05 18:08:10 +000059 int Cost = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +000060 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
61 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
62 int64_t Val = Tmp.getSExtValue();
63 Cost += getIntImmCost(Val);
64 }
65 // We need at least one instruction to materialze the constant.
Chandler Carruth93205eb2015-08-05 18:08:10 +000066 return std::max(1, Cost);
Tim Northover3b0846e2014-05-24 12:50:23 +000067}
68
Chandler Carruth93205eb2015-08-05 18:08:10 +000069int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
70 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +000071 assert(Ty->isIntegerTy());
72
73 unsigned BitSize = Ty->getPrimitiveSizeInBits();
74 // There is no cost model for constants with a bit size of 0. Return TCC_Free
75 // here, so that constant hoisting will ignore this constant.
76 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +000077 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000078
79 unsigned ImmIdx = ~0U;
80 switch (Opcode) {
81 default:
Chandler Carruth705b1852015-01-31 03:43:40 +000082 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000083 case Instruction::GetElementPtr:
84 // Always hoist the base address of a GetElementPtr.
85 if (Idx == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +000086 return 2 * TTI::TCC_Basic;
87 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +000088 case Instruction::Store:
89 ImmIdx = 0;
90 break;
91 case Instruction::Add:
92 case Instruction::Sub:
93 case Instruction::Mul:
94 case Instruction::UDiv:
95 case Instruction::SDiv:
96 case Instruction::URem:
97 case Instruction::SRem:
98 case Instruction::And:
99 case Instruction::Or:
100 case Instruction::Xor:
101 case Instruction::ICmp:
102 ImmIdx = 1;
103 break;
104 // Always return TCC_Free for the shift value of a shift instruction.
105 case Instruction::Shl:
106 case Instruction::LShr:
107 case Instruction::AShr:
108 if (Idx == 1)
Chandler Carruth705b1852015-01-31 03:43:40 +0000109 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000110 break;
111 case Instruction::Trunc:
112 case Instruction::ZExt:
113 case Instruction::SExt:
114 case Instruction::IntToPtr:
115 case Instruction::PtrToInt:
116 case Instruction::BitCast:
117 case Instruction::PHI:
118 case Instruction::Call:
119 case Instruction::Select:
120 case Instruction::Ret:
121 case Instruction::Load:
122 break;
123 }
124
125 if (Idx == ImmIdx) {
Chandler Carruth93205eb2015-08-05 18:08:10 +0000126 int NumConstants = (BitSize + 63) / 64;
127 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth705b1852015-01-31 03:43:40 +0000128 return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth93205eb2015-08-05 18:08:10 +0000129 ? static_cast<int>(TTI::TCC_Free)
Chandler Carruth705b1852015-01-31 03:43:40 +0000130 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000131 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000132 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000133}
134
Chandler Carruth93205eb2015-08-05 18:08:10 +0000135int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
136 const APInt &Imm, Type *Ty) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000137 assert(Ty->isIntegerTy());
138
139 unsigned BitSize = Ty->getPrimitiveSizeInBits();
140 // There is no cost model for constants with a bit size of 0. Return TCC_Free
141 // here, so that constant hoisting will ignore this constant.
142 if (BitSize == 0)
Chandler Carruth705b1852015-01-31 03:43:40 +0000143 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000144
145 switch (IID) {
146 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000147 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000148 case Intrinsic::sadd_with_overflow:
149 case Intrinsic::uadd_with_overflow:
150 case Intrinsic::ssub_with_overflow:
151 case Intrinsic::usub_with_overflow:
152 case Intrinsic::smul_with_overflow:
153 case Intrinsic::umul_with_overflow:
154 if (Idx == 1) {
Chandler Carruth93205eb2015-08-05 18:08:10 +0000155 int NumConstants = (BitSize + 63) / 64;
156 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
Chandler Carruth705b1852015-01-31 03:43:40 +0000157 return (Cost <= NumConstants * TTI::TCC_Basic)
Chandler Carruth93205eb2015-08-05 18:08:10 +0000158 ? static_cast<int>(TTI::TCC_Free)
Chandler Carruth705b1852015-01-31 03:43:40 +0000159 : Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000160 }
161 break;
162 case Intrinsic::experimental_stackmap:
163 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000164 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000165 break;
166 case Intrinsic::experimental_patchpoint_void:
167 case Intrinsic::experimental_patchpoint_i64:
168 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
Chandler Carruth705b1852015-01-31 03:43:40 +0000169 return TTI::TCC_Free;
Tim Northover3b0846e2014-05-24 12:50:23 +0000170 break;
171 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000172 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000173}
174
Chandler Carruth705b1852015-01-31 03:43:40 +0000175TargetTransformInfo::PopcntSupportKind
176AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000177 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
178 if (TyWidth == 32 || TyWidth == 64)
Chandler Carruth705b1852015-01-31 03:43:40 +0000179 return TTI::PSK_FastHardware;
Tim Northover3b0846e2014-05-24 12:50:23 +0000180 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
Chandler Carruth705b1852015-01-31 03:43:40 +0000181 return TTI::PSK_Software;
Tim Northover3b0846e2014-05-24 12:50:23 +0000182}
183
Chandler Carruth93205eb2015-08-05 18:08:10 +0000184int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000185 int ISD = TLI->InstructionOpcodeToISD(Opcode);
186 assert(ISD && "Invalid opcode");
187
Mehdi Amini44ede332015-07-09 02:09:04 +0000188 EVT SrcTy = TLI->getValueType(DL, Src);
189 EVT DstTy = TLI->getValueType(DL, Dst);
Tim Northover3b0846e2014-05-24 12:50:23 +0000190
191 if (!SrcTy.isSimple() || !DstTy.isSimple())
Chandler Carruth705b1852015-01-31 03:43:40 +0000192 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000193
Craig Topper4b275762015-10-28 04:02:12 +0000194 static const TypeConversionCostTblEntry
Craig Topper7bf52c92015-10-25 00:27:14 +0000195 ConversionTbl[] = {
Matthew Simpson343af072015-11-18 18:03:06 +0000196 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
197 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
198 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
199 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
Silviu Barangab322aa62015-08-17 16:05:09 +0000200
201 // The number of shll instructions for the extension.
Matthew Simpson343af072015-11-18 18:03:06 +0000202 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
203 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
204 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
205 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
206 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
207 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
208 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
209 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
210 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
211 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
212 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
213 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
214 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
215 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
Silviu Barangab322aa62015-08-17 16:05:09 +0000216 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
217 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
218
Tim Northover3b0846e2014-05-24 12:50:23 +0000219 // LowerVectorINT_TO_FP:
220 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000221 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000222 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
223 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000224 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000225 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000226
227 // Complex: to v2f32
228 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
229 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000230 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000231 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
232 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000233 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000234
235 // Complex: to v4f32
236 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
237 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
238 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
239 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
240
Silviu Barangab322aa62015-08-17 16:05:09 +0000241 // Complex: to v8f32
242 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
243 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
244 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
245 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
246
247 // Complex: to v16f32
248 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
249 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
250
Tim Northoveref0d7602014-06-15 09:27:06 +0000251 // Complex: to v2f64
252 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
253 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
254 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
255 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
256 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
257 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
258
259
Tim Northover3b0846e2014-05-24 12:50:23 +0000260 // LowerVectorFP_TO_INT
Tim Northoveref0d7602014-06-15 09:27:06 +0000261 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000262 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
263 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000264 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000265 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
266 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000267
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000268 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
Tim Northoveref0d7602014-06-15 09:27:06 +0000269 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000270 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
271 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000272 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000273 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
274 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
275
276 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
277 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
278 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
Tim Northoveref0d7602014-06-15 09:27:06 +0000279 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Tim Northoverdbecc3b2014-06-15 09:27:15 +0000280 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
281
282 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
283 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
284 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
285 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
286 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
287 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
288 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000289 };
290
Craig Topperee0c8592015-10-27 04:14:24 +0000291 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
292 DstTy.getSimpleVT(),
293 SrcTy.getSimpleVT()))
294 return Entry->Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000295
Chandler Carruth705b1852015-01-31 03:43:40 +0000296 return BaseT::getCastInstrCost(Opcode, Dst, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000297}
298
Chandler Carruth93205eb2015-08-05 18:08:10 +0000299int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
300 unsigned Index) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000301 assert(Val->isVectorTy() && "This must be a vector type");
302
303 if (Index != -1U) {
304 // Legalize the type.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000305 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
Tim Northover3b0846e2014-05-24 12:50:23 +0000306
307 // This type is legalized to a scalar type.
308 if (!LT.second.isVector())
309 return 0;
310
311 // The type may be split. Normalize the index to the new type.
312 unsigned Width = LT.second.getVectorNumElements();
313 Index = Index % Width;
314
315 // The element at index zero is already inside the vector.
316 if (Index == 0)
317 return 0;
318 }
319
320 // All other insert/extracts cost this much.
Matthew Simpson921ad012016-02-18 18:35:45 +0000321 if (ST->isKryo())
322 return 2;
Silviu Barangab322aa62015-08-17 16:05:09 +0000323 return 3;
Tim Northover3b0846e2014-05-24 12:50:23 +0000324}
325
Chandler Carruth93205eb2015-08-05 18:08:10 +0000326int AArch64TTIImpl::getArithmeticInstrCost(
Chandler Carruth705b1852015-01-31 03:43:40 +0000327 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
328 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
329 TTI::OperandValueProperties Opd2PropInfo) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000330 // Legalize the type.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000331 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Tim Northover3b0846e2014-05-24 12:50:23 +0000332
333 int ISD = TLI->InstructionOpcodeToISD(Opcode);
334
Chad Rosier70d54ac2014-09-29 13:59:31 +0000335 if (ISD == ISD::SDIV &&
336 Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
337 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
338 // On AArch64, scalar signed division by constants power-of-two are
339 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
340 // The OperandValue properties many not be same as that of previous
341 // operation; conservatively assume OP_None.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000342 int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
343 TargetTransformInfo::OP_None,
344 TargetTransformInfo::OP_None);
Chad Rosier70d54ac2014-09-29 13:59:31 +0000345 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
346 TargetTransformInfo::OP_None,
347 TargetTransformInfo::OP_None);
348 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
349 TargetTransformInfo::OP_None,
350 TargetTransformInfo::OP_None);
351 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
352 TargetTransformInfo::OP_None,
353 TargetTransformInfo::OP_None);
354 return Cost;
355 }
356
Tim Northover3b0846e2014-05-24 12:50:23 +0000357 switch (ISD) {
358 default:
Chandler Carruth705b1852015-01-31 03:43:40 +0000359 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
360 Opd1PropInfo, Opd2PropInfo);
Tim Northover3b0846e2014-05-24 12:50:23 +0000361 case ISD::ADD:
362 case ISD::MUL:
363 case ISD::XOR:
364 case ISD::OR:
365 case ISD::AND:
366 // These nodes are marked as 'custom' for combining purposes only.
367 // We know that they are legal. See LowerAdd in ISelLowering.
368 return 1 * LT.first;
369 }
370}
371
Chandler Carruth93205eb2015-08-05 18:08:10 +0000372int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000373 // Address computations in vectorized code with non-consecutive addresses will
374 // likely result in more instructions compared to scalar code where the
375 // computation can more often be merged into the index mode. The resulting
376 // extra micro-ops can significantly decrease throughput.
377 unsigned NumVectorInstToHideOverhead = 10;
378
379 if (Ty->isVectorTy() && IsComplex)
380 return NumVectorInstToHideOverhead;
381
382 // In many cases the address computation is not merged into the instruction
383 // addressing mode.
384 return 1;
385}
386
Chandler Carruth93205eb2015-08-05 18:08:10 +0000387int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
388 Type *CondTy) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000389
390 int ISD = TLI->InstructionOpcodeToISD(Opcode);
Silviu Barangaa3e27ed2015-09-09 15:35:02 +0000391 // We don't lower some vector selects well that are wider than the register
392 // width.
Tim Northover3b0846e2014-05-24 12:50:23 +0000393 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
394 // We would need this many instructions to hide the scalarization happening.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000395 const int AmortizationCost = 20;
Craig Topper4b275762015-10-28 04:02:12 +0000396 static const TypeConversionCostTblEntry
Tim Northover3b0846e2014-05-24 12:50:23 +0000397 VectorSelectTbl[] = {
Silviu Barangaa3e27ed2015-09-09 15:35:02 +0000398 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
399 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
400 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
Tim Northover3b0846e2014-05-24 12:50:23 +0000401 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
402 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
403 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
404 };
405
Mehdi Amini44ede332015-07-09 02:09:04 +0000406 EVT SelCondTy = TLI->getValueType(DL, CondTy);
407 EVT SelValTy = TLI->getValueType(DL, ValTy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000408 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
Craig Topperee0c8592015-10-27 04:14:24 +0000409 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
410 SelCondTy.getSimpleVT(),
411 SelValTy.getSimpleVT()))
412 return Entry->Cost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000413 }
414 }
Chandler Carruth705b1852015-01-31 03:43:40 +0000415 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000416}
417
Chandler Carruth93205eb2015-08-05 18:08:10 +0000418int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
419 unsigned Alignment, unsigned AddressSpace) {
420 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
Tim Northover3b0846e2014-05-24 12:50:23 +0000421
422 if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
423 Src->getVectorElementType()->isIntegerTy(64)) {
424 // Unaligned stores are extremely inefficient. We don't split
425 // unaligned v2i64 stores because the negative impact that has shown in
426 // practice on inlined memcpy code.
427 // We make v2i64 stores expensive so that we will only vectorize if there
428 // are 6 other instructions getting vectorized.
Chandler Carruth93205eb2015-08-05 18:08:10 +0000429 int AmortizationCost = 6;
Tim Northover3b0846e2014-05-24 12:50:23 +0000430
431 return LT.first * 2 * AmortizationCost;
432 }
433
434 if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
435 Src->getVectorNumElements() < 8) {
436 // We scalarize the loads/stores because there is not v.4b register and we
437 // have to promote the elements to v.4h.
438 unsigned NumVecElts = Src->getVectorNumElements();
439 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
440 // We generate 2 instructions per vector element.
441 return NumVectorizableInstsToAmortize * NumVecElts * 2;
442 }
443
444 return LT.first;
445}
James Molloy2b8933c2014-08-05 12:30:34 +0000446
Chandler Carruth93205eb2015-08-05 18:08:10 +0000447int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
448 unsigned Factor,
449 ArrayRef<unsigned> Indices,
450 unsigned Alignment,
451 unsigned AddressSpace) {
Hao Liu7ec8ee32015-06-26 02:32:07 +0000452 assert(Factor >= 2 && "Invalid interleave factor");
453 assert(isa<VectorType>(VecTy) && "Expect a vector type");
454
455 if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
456 unsigned NumElts = VecTy->getVectorNumElements();
457 Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
Ahmed Bougacha97564c32015-12-09 01:19:50 +0000458 unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
Hao Liu7ec8ee32015-06-26 02:32:07 +0000459
460 // ldN/stN only support legal vector types of size 64 or 128 in bits.
461 if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
462 return Factor;
463 }
464
465 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
466 Alignment, AddressSpace);
467}
468
Chandler Carruth93205eb2015-08-05 18:08:10 +0000469int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
470 int Cost = 0;
James Molloy2b8933c2014-08-05 12:30:34 +0000471 for (auto *I : Tys) {
472 if (!I->isVectorTy())
473 continue;
474 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
475 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
476 getMemoryOpCost(Instruction::Load, I, 128, 0);
477 }
478 return Cost;
479}
James Molloya88896b2014-08-21 00:02:51 +0000480
Wei Mi062c7442015-05-06 17:12:25 +0000481unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
Chad Rosiercd2be7f2016-02-12 15:51:51 +0000482 if (ST->isCortexA57() || ST->isKryo())
James Molloya88896b2014-08-21 00:02:51 +0000483 return 4;
484 return 2;
485}
Kevin Qin72a799a2014-10-09 10:13:27 +0000486
Chandler Carruthab5cb362015-02-01 14:31:23 +0000487void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
Chandler Carruth705b1852015-01-31 03:43:40 +0000488 TTI::UnrollingPreferences &UP) {
Kevin Qinaef68412015-03-09 06:14:28 +0000489 // Enable partial unrolling and runtime unrolling.
490 BaseT::getUnrollingPreferences(L, UP);
491
492 // For inner loop, it is more likely to be a hot one, and the runtime check
493 // can be promoted out from LICM pass, so the overhead is less, let's try
494 // a larger threshold to unroll more loops.
495 if (L->getLoopDepth() > 1)
496 UP.PartialThreshold *= 2;
497
Kevin Qin72a799a2014-10-09 10:13:27 +0000498 // Disable partial & runtime unrolling on -Os.
499 UP.PartialOptSizeThreshold = 0;
500}
Chad Rosierf9327d62015-01-26 22:51:15 +0000501
Chandler Carruth705b1852015-01-31 03:43:40 +0000502Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
503 Type *ExpectedType) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000504 switch (Inst->getIntrinsicID()) {
505 default:
506 return nullptr;
507 case Intrinsic::aarch64_neon_st2:
508 case Intrinsic::aarch64_neon_st3:
509 case Intrinsic::aarch64_neon_st4: {
510 // Create a struct type
511 StructType *ST = dyn_cast<StructType>(ExpectedType);
512 if (!ST)
513 return nullptr;
514 unsigned NumElts = Inst->getNumArgOperands() - 1;
515 if (ST->getNumElements() != NumElts)
516 return nullptr;
517 for (unsigned i = 0, e = NumElts; i != e; ++i) {
518 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
519 return nullptr;
520 }
521 Value *Res = UndefValue::get(ExpectedType);
522 IRBuilder<> Builder(Inst);
523 for (unsigned i = 0, e = NumElts; i != e; ++i) {
524 Value *L = Inst->getArgOperand(i);
525 Res = Builder.CreateInsertValue(Res, L, i);
526 }
527 return Res;
528 }
529 case Intrinsic::aarch64_neon_ld2:
530 case Intrinsic::aarch64_neon_ld3:
531 case Intrinsic::aarch64_neon_ld4:
532 if (Inst->getType() == ExpectedType)
533 return Inst;
534 return nullptr;
535 }
536}
537
Chandler Carruth705b1852015-01-31 03:43:40 +0000538bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
539 MemIntrinsicInfo &Info) {
Chad Rosierf9327d62015-01-26 22:51:15 +0000540 switch (Inst->getIntrinsicID()) {
541 default:
542 break;
543 case Intrinsic::aarch64_neon_ld2:
544 case Intrinsic::aarch64_neon_ld3:
545 case Intrinsic::aarch64_neon_ld4:
546 Info.ReadMem = true;
547 Info.WriteMem = false;
Philip Reames7c6692de2015-12-05 00:18:33 +0000548 Info.IsSimple = true;
Chad Rosierf9327d62015-01-26 22:51:15 +0000549 Info.NumMemRefs = 1;
550 Info.PtrVal = Inst->getArgOperand(0);
551 break;
552 case Intrinsic::aarch64_neon_st2:
553 case Intrinsic::aarch64_neon_st3:
554 case Intrinsic::aarch64_neon_st4:
555 Info.ReadMem = false;
556 Info.WriteMem = true;
Philip Reames7c6692de2015-12-05 00:18:33 +0000557 Info.IsSimple = true;
Chad Rosierf9327d62015-01-26 22:51:15 +0000558 Info.NumMemRefs = 1;
559 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
560 break;
561 }
562
563 switch (Inst->getIntrinsicID()) {
564 default:
565 return false;
566 case Intrinsic::aarch64_neon_ld2:
567 case Intrinsic::aarch64_neon_st2:
568 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
569 break;
570 case Intrinsic::aarch64_neon_ld3:
571 case Intrinsic::aarch64_neon_st3:
572 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
573 break;
574 case Intrinsic::aarch64_neon_ld4:
575 case Intrinsic::aarch64_neon_st4:
576 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
577 break;
578 }
579 return true;
580}
Adam Nemet53e758f2016-03-18 00:27:29 +0000581
582unsigned AArch64TTIImpl::getCacheLineSize() {
583 if (ST->isCyclone())
584 return 64;
585 return BaseT::getCacheLineSize();
586}
587
588unsigned AArch64TTIImpl::getPrefetchDistance() {
589 if (ST->isCyclone())
590 return CyclonePrefetchDistance;
591 return BaseT::getPrefetchDistance();
592}