blob: 8879630270e2cc0a6b5383e8d6f3688da8d6d406 [file] [log] [blame]
Hal Finkel4e5ca9e2013-01-25 23:05:59 +00001//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file implements a TargetTransformInfo analysis pass specific to the
11/// PPC target machine. It uses the target's detailed information to provide
12/// more precise answers to certain TTI queries, while letting the target
13/// independent and default TTI implementations handle the rest.
14///
15//===----------------------------------------------------------------------===//
16
17#define DEBUG_TYPE "ppctti"
18#include "PPC.h"
19#include "PPCTargetMachine.h"
20#include "llvm/Analysis/TargetTransformInfo.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Target/TargetLowering.h"
23#include "llvm/Target/CostTable.h"
24using namespace llvm;
25
26// Declare the pass initialization routine locally as target-specific passes
27// don't havve a target-wide initialization entry point, and so we rely on the
28// pass constructor initialization.
29namespace llvm {
30void initializePPCTTIPass(PassRegistry &);
31}
32
33namespace {
34
35class PPCTTI : public ImmutablePass, public TargetTransformInfo {
36 const PPCTargetMachine *TM;
37 const PPCSubtarget *ST;
38 const PPCTargetLowering *TLI;
39
40 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
41 /// are set if the result needs to be inserted and/or extracted from vectors.
42 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
43
44public:
45 PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
46 llvm_unreachable("This pass cannot be directly constructed");
47 }
48
49 PPCTTI(const PPCTargetMachine *TM)
50 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
51 TLI(TM->getTargetLowering()) {
52 initializePPCTTIPass(*PassRegistry::getPassRegistry());
53 }
54
55 virtual void initializePass() {
56 pushTTIStack(this);
57 }
58
59 virtual void finalizePass() {
60 popTTIStack();
61 }
62
63 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
64 TargetTransformInfo::getAnalysisUsage(AU);
65 }
66
67 /// Pass identification.
68 static char ID;
69
70 /// Provide necessary pointer adjustments for the two base classes.
71 virtual void *getAdjustedAnalysisPointer(const void *ID) {
72 if (ID == &TargetTransformInfo::ID)
73 return (TargetTransformInfo*)this;
74 return this;
75 }
76
77 /// \name Scalar TTI Implementations
78 /// @{
79 virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
Hal Finkel71780ec2013-09-11 21:20:40 +000080 virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000081
82 /// @}
83
84 /// \name Vector TTI Implementations
85 /// @{
86
87 virtual unsigned getNumberOfRegisters(bool Vector) const;
88 virtual unsigned getRegisterBitWidth(bool Vector) const;
89 virtual unsigned getMaximumUnrollFactor() const;
Arnold Schwaighoferb9773872013-04-04 23:26:21 +000090 virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
91 OperandValueKind,
92 OperandValueKind) const;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +000093 virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
94 int Index, Type *SubTp) const;
95 virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
96 Type *Src) const;
97 virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
98 Type *CondTy) const;
99 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
100 unsigned Index) const;
101 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
102 unsigned Alignment,
103 unsigned AddressSpace) const;
104
105 /// @}
106};
107
108} // end anonymous namespace
109
110INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
111 "PPC Target Transform Info", true, true, false)
112char PPCTTI::ID = 0;
113
114ImmutablePass *
115llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
116 return new PPCTTI(TM);
117}
118
119
120//===----------------------------------------------------------------------===//
121//
122// PPC cost model.
123//
124//===----------------------------------------------------------------------===//
125
126PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
127 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
Hal Finkela4d07482013-03-28 13:29:47 +0000128 if (ST->hasPOPCNTD() && TyWidth <= 64)
129 return PSK_FastHardware;
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000130 return PSK_Software;
131}
132
Hal Finkel71780ec2013-09-11 21:20:40 +0000133void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
134 if (ST->getDarwinDirective() == PPC::DIR_A2) {
135 // The A2 is in-order with a deep pipeline, and concatenation unrolling
136 // helps expose latency-hiding opportunities to the instruction scheduler.
137 UP.Partial = UP.Runtime = true;
138 }
139}
140
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000141unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
142 if (Vector && !ST->hasAltivec())
143 return 0;
144 return 32;
145}
146
147unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
148 if (Vector) {
149 if (ST->hasAltivec()) return 128;
150 return 0;
151 }
152
153 if (ST->isPPC64())
154 return 64;
155 return 32;
156
157}
158
159unsigned PPCTTI::getMaximumUnrollFactor() const {
160 unsigned Directive = ST->getDarwinDirective();
161 // The 440 has no SIMD support, but floating-point instructions
162 // have a 5-cycle latency, so unroll by 5x for latency hiding.
163 if (Directive == PPC::DIR_440)
164 return 5;
165
166 // The A2 has no SIMD support, but floating-point instructions
167 // have a 6-cycle latency, so unroll by 6x for latency hiding.
168 if (Directive == PPC::DIR_A2)
169 return 6;
170
171 // FIXME: For lack of any better information, do no harm...
172 if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
173 return 1;
174
175 // For most things, modern systems have two execution units (and
176 // out-of-order execution).
177 return 2;
178}
179
Arnold Schwaighoferb9773872013-04-04 23:26:21 +0000180unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
181 OperandValueKind Op1Info,
182 OperandValueKind Op2Info) const {
Dmitri Gribenkoc451bdf2013-01-25 23:17:21 +0000183 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000184
185 // Fallback to the default implementation.
Arnold Schwaighoferb9773872013-04-04 23:26:21 +0000186 return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
187 Op2Info);
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000188}
189
190unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
191 Type *SubTp) const {
192 return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
193}
194
195unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
Dmitri Gribenkoc451bdf2013-01-25 23:17:21 +0000196 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000197
198 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
199}
200
201unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
202 Type *CondTy) const {
203 return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
204}
205
206unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
207 unsigned Index) const {
208 assert(Val->isVectorTy() && "This must be a vector type");
209
Bill Schmidt62fe7a5b2013-02-08 18:19:17 +0000210 int ISD = TLI->InstructionOpcodeToISD(Opcode);
211 assert(ISD && "Invalid opcode");
Bill Schmidtb3cece12013-02-07 20:33:57 +0000212
Bill Schmidt62fe7a5b2013-02-08 18:19:17 +0000213 // Estimated cost of a load-hit-store delay. This was obtained
214 // experimentally as a minimum needed to prevent unprofitable
215 // vectorization for the paq8p benchmark. It may need to be
216 // raised further if other unprofitable cases remain.
217 unsigned LHSPenalty = 12;
Bill Schmidtb3cece12013-02-07 20:33:57 +0000218
Bill Schmidt62fe7a5b2013-02-08 18:19:17 +0000219 // Vector element insert/extract with Altivec is very expensive,
220 // because they require store and reload with the attendant
221 // processor stall for load-hit-store. Until VSX is available,
222 // these need to be estimated as very costly.
223 if (ISD == ISD::EXTRACT_VECTOR_ELT ||
224 ISD == ISD::INSERT_VECTOR_ELT)
225 return LHSPenalty +
226 TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
Bill Schmidtb3cece12013-02-07 20:33:57 +0000227
Hal Finkel4e5ca9e2013-01-25 23:05:59 +0000228 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
229}
230
231unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
232 unsigned AddressSpace) const {
233 // Legalize the type.
234 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
235 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
236 "Invalid Opcode");
237
238 // Each load/store unit costs 1.
239 unsigned Cost = LT.first * 1;
240
241 // PPC in general does not support unaligned loads and stores. They'll need
242 // to be decomposed based on the alignment factor.
243 unsigned SrcBytes = LT.second.getStoreSize();
244 if (SrcBytes && Alignment && Alignment < SrcBytes)
245 Cost *= (SrcBytes/Alignment);
246
247 return Cost;
248}
249