blob: 23a75cf39df644506a89332d57788f01aabdf03e [file] [log] [blame]
Hal Finkel4e5ca9e2013-01-25 23:05:59 +00001//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file implements a TargetTransformInfo analysis pass specific to the
11/// PPC target machine. It uses the target's detailed information to provide
12/// more precise answers to certain TTI queries, while letting the target
13/// independent and default TTI implementations handle the rest.
14///
15//===----------------------------------------------------------------------===//
16
17#define DEBUG_TYPE "ppctti"
18#include "PPC.h"
19#include "PPCTargetMachine.h"
20#include "llvm/Analysis/TargetTransformInfo.h"
21#include "llvm/Support/Debug.h"
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000022#include "llvm/Target/CostTable.h"
Chandler Carruth8a8cd2b2014-01-07 11:48:04 +000023#include "llvm/Target/TargetLowering.h"
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000024using namespace llvm;
25
26// Declare the pass initialization routine locally as target-specific passes
27// don't havve a target-wide initialization entry point, and so we rely on the
28// pass constructor initialization.
29namespace llvm {
30void initializePPCTTIPass(PassRegistry &);
31}
32
33namespace {
34
Craig Topper77dfe452014-03-02 08:08:51 +000035class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000036 const PPCSubtarget *ST;
37 const PPCTargetLowering *TLI;
38
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000039public:
Hal Finkel41e9b1d2014-04-05 00:16:28 +000040 PPCTTI() : ImmutablePass(ID), ST(0), TLI(0) {
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000041 llvm_unreachable("This pass cannot be directly constructed");
42 }
43
44 PPCTTI(const PPCTargetMachine *TM)
Hal Finkel41e9b1d2014-04-05 00:16:28 +000045 : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000046 TLI(TM->getTargetLowering()) {
47 initializePPCTTIPass(*PassRegistry::getPassRegistry());
48 }
49
Craig Topper73156022014-03-02 09:09:27 +000050 virtual void initializePass() override {
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000051 pushTTIStack(this);
52 }
53
Craig Topper73156022014-03-02 09:09:27 +000054 virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000055 TargetTransformInfo::getAnalysisUsage(AU);
56 }
57
58 /// Pass identification.
59 static char ID;
60
61 /// Provide necessary pointer adjustments for the two base classes.
Craig Topper73156022014-03-02 09:09:27 +000062 virtual void *getAdjustedAnalysisPointer(const void *ID) override {
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000063 if (ID == &TargetTransformInfo::ID)
64 return (TargetTransformInfo*)this;
65 return this;
66 }
67
68 /// \name Scalar TTI Implementations
69 /// @{
Juergen Ributzka3e752e72014-01-24 18:22:59 +000070 virtual PopcntSupportKind
Craig Topper73156022014-03-02 09:09:27 +000071 getPopcntSupport(unsigned TyWidth) const override;
Juergen Ributzka3e752e72014-01-24 18:22:59 +000072 virtual void getUnrollingPreferences(
Craig Topper73156022014-03-02 09:09:27 +000073 Loop *L, UnrollingPreferences &UP) const override;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000074
75 /// @}
76
77 /// \name Vector TTI Implementations
78 /// @{
79
Craig Topper73156022014-03-02 09:09:27 +000080 virtual unsigned getNumberOfRegisters(bool Vector) const override;
81 virtual unsigned getRegisterBitWidth(bool Vector) const override;
82 virtual unsigned getMaximumUnrollFactor() const override;
Arnold Schwaighoferb9773872013-04-04 23:26:21 +000083 virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
84 OperandValueKind,
Craig Topper73156022014-03-02 09:09:27 +000085 OperandValueKind) const override;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000086 virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
Craig Topper73156022014-03-02 09:09:27 +000087 int Index, Type *SubTp) const override;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000088 virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
Craig Topper73156022014-03-02 09:09:27 +000089 Type *Src) const override;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000090 virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Craig Topper73156022014-03-02 09:09:27 +000091 Type *CondTy) const override;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000092 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
Craig Topper73156022014-03-02 09:09:27 +000093 unsigned Index) const override;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000094 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
95 unsigned Alignment,
Craig Topper73156022014-03-02 09:09:27 +000096 unsigned AddressSpace) const override;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000097
98 /// @}
99};
100
101} // end anonymous namespace
102
103INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
104 "PPC Target Transform Info", true, true, false)
105char PPCTTI::ID = 0;
106
107ImmutablePass *
108llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
109 return new PPCTTI(TM);
110}
111
112
113//===----------------------------------------------------------------------===//
114//
115// PPC cost model.
116//
117//===----------------------------------------------------------------------===//
118
119PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
120 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
Hal Finkela4d07482013-03-28 13:29:47 +0000121 if (ST->hasPOPCNTD() && TyWidth <= 64)
122 return PSK_FastHardware;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000123 return PSK_Software;
124}
125
Hal Finkel71780ec2013-09-11 21:20:40 +0000126void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
127 if (ST->getDarwinDirective() == PPC::DIR_A2) {
128 // The A2 is in-order with a deep pipeline, and concatenation unrolling
129 // helps expose latency-hiding opportunities to the instruction scheduler.
130 UP.Partial = UP.Runtime = true;
131 }
132}
133
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000134unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
135 if (Vector && !ST->hasAltivec())
136 return 0;
Hal Finkel27774d92014-03-13 07:58:58 +0000137 return ST->hasVSX() ? 64 : 32;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000138}
139
140unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
141 if (Vector) {
142 if (ST->hasAltivec()) return 128;
143 return 0;
144 }
145
146 if (ST->isPPC64())
147 return 64;
148 return 32;
149
150}
151
152unsigned PPCTTI::getMaximumUnrollFactor() const {
153 unsigned Directive = ST->getDarwinDirective();
154 // The 440 has no SIMD support, but floating-point instructions
155 // have a 5-cycle latency, so unroll by 5x for latency hiding.
156 if (Directive == PPC::DIR_440)
157 return 5;
158
159 // The A2 has no SIMD support, but floating-point instructions
160 // have a 6-cycle latency, so unroll by 6x for latency hiding.
161 if (Directive == PPC::DIR_A2)
162 return 6;
163
164 // FIXME: For lack of any better information, do no harm...
165 if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
166 return 1;
167
168 // For most things, modern systems have two execution units (and
169 // out-of-order execution).
170 return 2;
171}
172
Arnold Schwaighoferb9773872013-04-04 23:26:21 +0000173unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
174 OperandValueKind Op1Info,
175 OperandValueKind Op2Info) const {
Dmitri Gribenkoc451bdf2013-01-25 23:17:21 +0000176 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000177
178 // Fallback to the default implementation.
Arnold Schwaighoferb9773872013-04-04 23:26:21 +0000179 return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
180 Op2Info);
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000181}
182
183unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
184 Type *SubTp) const {
185 return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
186}
187
188unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
Dmitri Gribenkoc451bdf2013-01-25 23:17:21 +0000189 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000190
191 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
192}
193
194unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
195 Type *CondTy) const {
196 return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
197}
198
199unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
200 unsigned Index) const {
201 assert(Val->isVectorTy() && "This must be a vector type");
202
Bill Schmidt62fe7a5b2013-02-08 18:19:17 +0000203 int ISD = TLI->InstructionOpcodeToISD(Opcode);
204 assert(ISD && "Invalid opcode");
Bill Schmidtb3cece12013-02-07 20:33:57 +0000205
Hal Finkel27774d92014-03-13 07:58:58 +0000206 if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
207 // Double-precision scalars are already located in index #0.
208 if (Index == 0)
209 return 0;
210
211 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
212 }
213
Bill Schmidt62fe7a5b2013-02-08 18:19:17 +0000214 // Estimated cost of a load-hit-store delay. This was obtained
215 // experimentally as a minimum needed to prevent unprofitable
216 // vectorization for the paq8p benchmark. It may need to be
217 // raised further if other unprofitable cases remain.
Hal Finkelde0b4132014-04-04 23:51:18 +0000218 unsigned LHSPenalty = 2;
219 if (ISD == ISD::INSERT_VECTOR_ELT)
220 LHSPenalty += 7;
Bill Schmidtb3cece12013-02-07 20:33:57 +0000221
Bill Schmidt62fe7a5b2013-02-08 18:19:17 +0000222 // Vector element insert/extract with Altivec is very expensive,
223 // because they require store and reload with the attendant
224 // processor stall for load-hit-store. Until VSX is available,
225 // these need to be estimated as very costly.
226 if (ISD == ISD::EXTRACT_VECTOR_ELT ||
227 ISD == ISD::INSERT_VECTOR_ELT)
228 return LHSPenalty +
229 TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
Bill Schmidtb3cece12013-02-07 20:33:57 +0000230
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000231 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
232}
233
234unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
235 unsigned AddressSpace) const {
236 // Legalize the type.
237 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
238 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
239 "Invalid Opcode");
240
Hal Finkelf8233802014-04-02 22:43:49 +0000241 unsigned Cost =
242 TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000243
Hal Finkelde0b4132014-04-04 23:51:18 +0000244 // VSX loads/stores support unaligned access.
245 if (ST->hasVSX()) {
246 if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
247 return Cost;
248 }
249
250 bool UnalignedAltivec =
251 Src->isVectorTy() &&
252 Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
253 LT.second.getSizeInBits() == 128 &&
254 Opcode == Instruction::Load;
Hal Finkel6e28e6a2014-03-26 19:39:09 +0000255
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000256 // PPC in general does not support unaligned loads and stores. They'll need
257 // to be decomposed based on the alignment factor.
258 unsigned SrcBytes = LT.second.getStoreSize();
Hal Finkelde0b4132014-04-04 23:51:18 +0000259 if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
Hal Finkelf8233802014-04-02 22:43:49 +0000260 Cost += LT.first*(SrcBytes/Alignment-1);
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000261
Hal Finkelde0b4132014-04-04 23:51:18 +0000262 // For a vector type, there is also scalarization overhead (only for
263 // stores, loads are expanded using the vector-load + permutation sequence,
264 // which is much less expensive).
265 if (Src->isVectorTy() && Opcode == Instruction::Store)
266 for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
267 Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
268 }
269
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000270 return Cost;
271}
272