Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 1 | //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file defines the cost model analysis. It provides a very basic cost |
Nadav Rotem | 99868e4 | 2012-12-24 05:51:12 +0000 | [diff] [blame] | 11 | // estimation for LLVM-IR. This analysis uses the services of the codegen |
| 12 | // to approximate the cost of any IR instruction when lowered to machine |
| 13 | // instructions. The cost results are unit-less and the cost number represents |
| 14 | // the throughput of the machine assuming that all loads hit the cache, all |
| 15 | // branches are predicted, etc. The cost numbers can be added in order to |
| 16 | // compare two or more transformation alternatives. |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 17 | // |
| 18 | //===----------------------------------------------------------------------===// |
| 19 | |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 20 | #include "llvm/ADT/STLExtras.h" |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 21 | #include "llvm/Analysis/Passes.h" |
Chandler Carruth | d3e7355 | 2013-01-07 03:08:10 +0000 | [diff] [blame] | 22 | #include "llvm/Analysis/TargetTransformInfo.h" |
Michael Kuperstein | 3ceac2b | 2016-08-04 22:48:03 +0000 | [diff] [blame] | 23 | #include "llvm/Analysis/VectorUtils.h" |
Chandler Carruth | 9fb823b | 2013-01-02 11:36:10 +0000 | [diff] [blame] | 24 | #include "llvm/IR/Function.h" |
| 25 | #include "llvm/IR/Instructions.h" |
Benjamin Kramer | f7cfac7 | 2013-02-28 19:09:33 +0000 | [diff] [blame] | 26 | #include "llvm/IR/IntrinsicInst.h" |
Chandler Carruth | 9fb823b | 2013-01-02 11:36:10 +0000 | [diff] [blame] | 27 | #include "llvm/IR/Value.h" |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 28 | #include "llvm/Pass.h" |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 29 | #include "llvm/Support/CommandLine.h" |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 30 | #include "llvm/Support/Debug.h" |
| 31 | #include "llvm/Support/raw_ostream.h" |
| 32 | using namespace llvm; |
| 33 | |
Chandler Carruth | f1221bd | 2014-04-22 02:48:03 +0000 | [diff] [blame] | 34 | #define CM_NAME "cost-model" |
| 35 | #define DEBUG_TYPE CM_NAME |
| 36 | |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 37 | static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), |
| 38 | cl::Hidden, |
| 39 | cl::desc("Recognize reduction patterns.")); |
| 40 | |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 41 | namespace { |
| 42 | class CostModelAnalysis : public FunctionPass { |
| 43 | |
| 44 | public: |
| 45 | static char ID; // Class identification, replacement for typeinfo |
Craig Topper | 9f00886 | 2014-04-15 04:59:12 +0000 | [diff] [blame] | 46 | CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 47 | initializeCostModelAnalysisPass( |
| 48 | *PassRegistry::getPassRegistry()); |
| 49 | } |
| 50 | |
| 51 | /// Returns the expected cost of the instruction. |
| 52 | /// Returns -1 if the cost is unknown. |
| 53 | /// Note, this method does not cache the cost calculation and it |
| 54 | /// can be expensive in some cases. |
Nadav Rotem | ce5db0f | 2012-12-03 22:47:12 +0000 | [diff] [blame] | 55 | unsigned getInstructionCost(const Instruction *I) const; |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 56 | |
| 57 | private: |
Craig Topper | e9ba759 | 2014-03-05 07:30:04 +0000 | [diff] [blame] | 58 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 59 | bool runOnFunction(Function &F) override; |
| 60 | void print(raw_ostream &OS, const Module*) const override; |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 61 | |
| 62 | /// The function that we analyze. |
| 63 | Function *F; |
Chandler Carruth | cf569a8 | 2013-01-05 10:09:33 +0000 | [diff] [blame] | 64 | /// Target information. |
| 65 | const TargetTransformInfo *TTI; |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 66 | }; |
| 67 | } // End of anonymous namespace |
| 68 | |
| 69 | // Register this pass. |
| 70 | char CostModelAnalysis::ID = 0; |
| 71 | static const char cm_name[] = "Cost Model Analysis"; |
| 72 | INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) |
| 73 | INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) |
| 74 | |
| 75 | FunctionPass *llvm::createCostModelAnalysisPass() { |
| 76 | return new CostModelAnalysis(); |
| 77 | } |
| 78 | |
| 79 | void |
| 80 | CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { |
| 81 | AU.setPreservesAll(); |
| 82 | } |
| 83 | |
| 84 | bool |
| 85 | CostModelAnalysis::runOnFunction(Function &F) { |
| 86 | this->F = &F; |
Chandler Carruth | 705b185 | 2015-01-31 03:43:40 +0000 | [diff] [blame] | 87 | auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); |
Chandler Carruth | fdb9c57 | 2015-02-01 12:01:35 +0000 | [diff] [blame] | 88 | TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr; |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 89 | |
| 90 | return false; |
| 91 | } |
| 92 | |
Simon Pilgrim | c93cd30 | 2016-12-21 15:49:01 +0000 | [diff] [blame] | 93 | static bool isReverseVectorMask(ArrayRef<int> Mask) { |
Arnold Schwaighofer | 7e2ca6e | 2013-02-12 02:40:37 +0000 | [diff] [blame] | 94 | for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) |
Simon Pilgrim | 9876ed0 | 2016-12-15 12:12:45 +0000 | [diff] [blame] | 95 | if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) |
Arnold Schwaighofer | 7e2ca6e | 2013-02-12 02:40:37 +0000 | [diff] [blame] | 96 | return false; |
| 97 | return true; |
| 98 | } |
| 99 | |
Elena Demikhovsky | 21706cb | 2017-01-02 10:37:52 +0000 | [diff] [blame] | 100 | static bool isSingleSourceVectorMask(ArrayRef<int> Mask) { |
| 101 | bool Vec0 = false; |
| 102 | bool Vec1 = false; |
| 103 | for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { |
| 104 | if (Mask[i] >= 0) { |
| 105 | if ((unsigned)Mask[i] >= NumVecElts) |
| 106 | Vec1 = true; |
| 107 | else |
| 108 | Vec0 = true; |
| 109 | } |
| 110 | } |
| 111 | return !(Vec0 && Vec1); |
| 112 | } |
| 113 | |
| 114 | static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) { |
| 115 | for (unsigned i = 0; i < Mask.size(); ++i) |
| 116 | if (Mask[i] > 0) |
| 117 | return false; |
| 118 | return true; |
| 119 | } |
| 120 | |
Simon Pilgrim | c93cd30 | 2016-12-21 15:49:01 +0000 | [diff] [blame] | 121 | static bool isAlternateVectorMask(ArrayRef<int> Mask) { |
Andrea Di Biagio | c8e8bda | 2014-07-03 22:24:18 +0000 | [diff] [blame] | 122 | bool isAlternate = true; |
| 123 | unsigned MaskSize = Mask.size(); |
| 124 | |
| 125 | // Example: shufflevector A, B, <0,5,2,7> |
| 126 | for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { |
| 127 | if (Mask[i] < 0) |
| 128 | continue; |
| 129 | isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); |
| 130 | } |
| 131 | |
| 132 | if (isAlternate) |
| 133 | return true; |
| 134 | |
| 135 | isAlternate = true; |
| 136 | // Example: shufflevector A, B, <4,1,6,3> |
| 137 | for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { |
| 138 | if (Mask[i] < 0) |
| 139 | continue; |
| 140 | isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); |
| 141 | } |
| 142 | |
| 143 | return isAlternate; |
| 144 | } |
| 145 | |
Arnold Schwaighofer | b977387 | 2013-04-04 23:26:21 +0000 | [diff] [blame] | 146 | static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { |
| 147 | TargetTransformInfo::OperandValueKind OpInfo = |
Michael Kuperstein | 3ceac2b | 2016-08-04 22:48:03 +0000 | [diff] [blame] | 148 | TargetTransformInfo::OK_AnyValue; |
Arnold Schwaighofer | b977387 | 2013-04-04 23:26:21 +0000 | [diff] [blame] | 149 | |
Andrea Di Biagio | b7882b3 | 2014-02-12 23:43:47 +0000 | [diff] [blame] | 150 | // Check for a splat of a constant or for a non uniform vector of constants. |
Benjamin Kramer | 989b929 | 2014-02-13 16:48:38 +0000 | [diff] [blame] | 151 | if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { |
Andrea Di Biagio | b7882b3 | 2014-02-12 23:43:47 +0000 | [diff] [blame] | 152 | OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; |
Craig Topper | 9f00886 | 2014-04-15 04:59:12 +0000 | [diff] [blame] | 153 | if (cast<Constant>(V)->getSplatValue() != nullptr) |
Arnold Schwaighofer | b977387 | 2013-04-04 23:26:21 +0000 | [diff] [blame] | 154 | OpInfo = TargetTransformInfo::OK_UniformConstantValue; |
Andrea Di Biagio | b7882b3 | 2014-02-12 23:43:47 +0000 | [diff] [blame] | 155 | } |
Arnold Schwaighofer | b977387 | 2013-04-04 23:26:21 +0000 | [diff] [blame] | 156 | |
Michael Kuperstein | 3ceac2b | 2016-08-04 22:48:03 +0000 | [diff] [blame] | 157 | // Check for a splat of a uniform value. This is not loop aware, so return |
| 158 | // true only for the obviously uniform cases (argument, globalvalue) |
| 159 | const Value *Splat = getSplatValue(V); |
| 160 | if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat))) |
| 161 | OpInfo = TargetTransformInfo::OK_UniformValue; |
| 162 | |
Arnold Schwaighofer | b977387 | 2013-04-04 23:26:21 +0000 | [diff] [blame] | 163 | return OpInfo; |
| 164 | } |
| 165 | |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 166 | static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, |
| 167 | unsigned Level) { |
| 168 | // We don't need a shuffle if we just want to have element 0 in position 0 of |
| 169 | // the vector. |
| 170 | if (!SI && Level == 0 && IsLeft) |
| 171 | return true; |
| 172 | else if (!SI) |
| 173 | return false; |
| 174 | |
| 175 | SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); |
| 176 | |
| 177 | // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether |
| 178 | // we look at the left or right side. |
| 179 | for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) |
| 180 | Mask[i] = val; |
| 181 | |
| 182 | SmallVector<int, 16> ActualMask = SI->getShuffleMask(); |
Alexander Kornienko | 484e48e3 | 2015-11-05 21:07:12 +0000 | [diff] [blame] | 183 | return Mask == ActualMask; |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 184 | } |
| 185 | |
| 186 | static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, |
| 187 | unsigned Level, unsigned NumLevels) { |
| 188 | // Match one level of pairwise operations. |
| 189 | // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, |
| 190 | // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> |
| 191 | // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, |
| 192 | // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> |
| 193 | // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 |
Craig Topper | 9f00886 | 2014-04-15 04:59:12 +0000 | [diff] [blame] | 194 | if (BinOp == nullptr) |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 195 | return false; |
| 196 | |
Eric Christopher | e7af7bd | 2013-09-17 21:13:57 +0000 | [diff] [blame] | 197 | assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 198 | |
| 199 | unsigned Opcode = BinOp->getOpcode(); |
| 200 | Value *L = BinOp->getOperand(0); |
| 201 | Value *R = BinOp->getOperand(1); |
| 202 | |
| 203 | ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); |
| 204 | if (!LS && Level) |
| 205 | return false; |
| 206 | ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); |
| 207 | if (!RS && Level) |
| 208 | return false; |
| 209 | |
| 210 | // On level 0 we can omit one shufflevector instruction. |
| 211 | if (!Level && !RS && !LS) |
| 212 | return false; |
| 213 | |
| 214 | // Shuffle inputs must match. |
Craig Topper | 9f00886 | 2014-04-15 04:59:12 +0000 | [diff] [blame] | 215 | Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; |
| 216 | Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; |
| 217 | Value *NextLevelOp = nullptr; |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 218 | if (NextLevelOpR && NextLevelOpL) { |
| 219 | // If we have two shuffles their operands must match. |
| 220 | if (NextLevelOpL != NextLevelOpR) |
| 221 | return false; |
| 222 | |
| 223 | NextLevelOp = NextLevelOpL; |
| 224 | } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { |
| 225 | // On the first level we can omit the shufflevector <0, undef,...>. So the |
| 226 | // input to the other shufflevector <1, undef> must match with one of the |
| 227 | // inputs to the current binary operation. |
| 228 | // Example: |
| 229 | // %NextLevelOpL = shufflevector %R, <1, undef ...> |
| 230 | // %BinOp = fadd %NextLevelOpL, %R |
| 231 | if (NextLevelOpL && NextLevelOpL != R) |
| 232 | return false; |
| 233 | else if (NextLevelOpR && NextLevelOpR != L) |
| 234 | return false; |
| 235 | |
| 236 | NextLevelOp = NextLevelOpL ? R : L; |
| 237 | } else |
| 238 | return false; |
| 239 | |
| 240 | // Check that the next levels binary operation exists and matches with the |
| 241 | // current one. |
Craig Topper | 9f00886 | 2014-04-15 04:59:12 +0000 | [diff] [blame] | 242 | BinaryOperator *NextLevelBinOp = nullptr; |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 243 | if (Level + 1 != NumLevels) { |
| 244 | if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) |
| 245 | return false; |
| 246 | else if (NextLevelBinOp->getOpcode() != Opcode) |
| 247 | return false; |
| 248 | } |
| 249 | |
| 250 | // Shuffle mask for pairwise operation must match. |
| 251 | if (matchPairwiseShuffleMask(LS, true, Level)) { |
| 252 | if (!matchPairwiseShuffleMask(RS, false, Level)) |
| 253 | return false; |
| 254 | } else if (matchPairwiseShuffleMask(RS, true, Level)) { |
| 255 | if (!matchPairwiseShuffleMask(LS, false, Level)) |
| 256 | return false; |
| 257 | } else |
| 258 | return false; |
| 259 | |
| 260 | if (++Level == NumLevels) |
| 261 | return true; |
| 262 | |
| 263 | // Match next level. |
| 264 | return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); |
| 265 | } |
| 266 | |
| 267 | static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, |
| 268 | unsigned &Opcode, Type *&Ty) { |
| 269 | if (!EnableReduxCost) |
| 270 | return false; |
| 271 | |
| 272 | // Need to extract the first element. |
| 273 | ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); |
| 274 | unsigned Idx = ~0u; |
| 275 | if (CI) |
| 276 | Idx = CI->getZExtValue(); |
| 277 | if (Idx != 0) |
| 278 | return false; |
| 279 | |
| 280 | BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); |
| 281 | if (!RdxStart) |
| 282 | return false; |
| 283 | |
| 284 | Type *VecTy = ReduxRoot->getOperand(0)->getType(); |
| 285 | unsigned NumVecElems = VecTy->getVectorNumElements(); |
| 286 | if (!isPowerOf2_32(NumVecElems)) |
| 287 | return false; |
| 288 | |
| 289 | // We look for a sequence of shuffle,shuffle,add triples like the following |
| 290 | // that builds a pairwise reduction tree. |
| 291 | // |
| 292 | // (X0, X1, X2, X3) |
| 293 | // (X0 + X1, X2 + X3, undef, undef) |
| 294 | // ((X0 + X1) + (X2 + X3), undef, undef, undef) |
| 295 | // |
| 296 | // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, |
| 297 | // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> |
| 298 | // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, |
| 299 | // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> |
| 300 | // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 |
| 301 | // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, |
| 302 | // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> |
| 303 | // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, |
| 304 | // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> |
| 305 | // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 |
| 306 | // %r = extractelement <4 x float> %bin.rdx8, i32 0 |
| 307 | if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) |
| 308 | return false; |
| 309 | |
| 310 | Opcode = RdxStart->getOpcode(); |
| 311 | Ty = VecTy; |
| 312 | |
| 313 | return true; |
| 314 | } |
| 315 | |
| 316 | static std::pair<Value *, ShuffleVectorInst *> |
| 317 | getShuffleAndOtherOprd(BinaryOperator *B) { |
| 318 | |
| 319 | Value *L = B->getOperand(0); |
| 320 | Value *R = B->getOperand(1); |
Craig Topper | 9f00886 | 2014-04-15 04:59:12 +0000 | [diff] [blame] | 321 | ShuffleVectorInst *S = nullptr; |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 322 | |
| 323 | if ((S = dyn_cast<ShuffleVectorInst>(L))) |
| 324 | return std::make_pair(R, S); |
| 325 | |
| 326 | S = dyn_cast<ShuffleVectorInst>(R); |
| 327 | return std::make_pair(L, S); |
| 328 | } |
| 329 | |
| 330 | static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, |
| 331 | unsigned &Opcode, Type *&Ty) { |
| 332 | if (!EnableReduxCost) |
| 333 | return false; |
| 334 | |
| 335 | // Need to extract the first element. |
| 336 | ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); |
| 337 | unsigned Idx = ~0u; |
| 338 | if (CI) |
| 339 | Idx = CI->getZExtValue(); |
| 340 | if (Idx != 0) |
| 341 | return false; |
| 342 | |
| 343 | BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); |
| 344 | if (!RdxStart) |
| 345 | return false; |
| 346 | unsigned RdxOpcode = RdxStart->getOpcode(); |
| 347 | |
| 348 | Type *VecTy = ReduxRoot->getOperand(0)->getType(); |
| 349 | unsigned NumVecElems = VecTy->getVectorNumElements(); |
| 350 | if (!isPowerOf2_32(NumVecElems)) |
| 351 | return false; |
| 352 | |
| 353 | // We look for a sequence of shuffles and adds like the following matching one |
| 354 | // fadd, shuffle vector pair at a time. |
| 355 | // |
| 356 | // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, |
| 357 | // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> |
| 358 | // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf |
| 359 | // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, |
| 360 | // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> |
| 361 | // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 |
| 362 | // %r = extractelement <4 x float> %bin.rdx8, i32 0 |
| 363 | |
| 364 | unsigned MaskStart = 1; |
| 365 | Value *RdxOp = RdxStart; |
| 366 | SmallVector<int, 32> ShuffleMask(NumVecElems, 0); |
| 367 | unsigned NumVecElemsRemain = NumVecElems; |
| 368 | while (NumVecElemsRemain - 1) { |
| 369 | // Check for the right reduction operation. |
| 370 | BinaryOperator *BinOp; |
| 371 | if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) |
| 372 | return false; |
| 373 | if (BinOp->getOpcode() != RdxOpcode) |
| 374 | return false; |
| 375 | |
| 376 | Value *NextRdxOp; |
| 377 | ShuffleVectorInst *Shuffle; |
Benjamin Kramer | d6f1f84 | 2014-03-02 13:30:33 +0000 | [diff] [blame] | 378 | std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 379 | |
| 380 | // Check the current reduction operation and the shuffle use the same value. |
Craig Topper | 9f00886 | 2014-04-15 04:59:12 +0000 | [diff] [blame] | 381 | if (Shuffle == nullptr) |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 382 | return false; |
| 383 | if (Shuffle->getOperand(0) != NextRdxOp) |
| 384 | return false; |
| 385 | |
| 386 | // Check that shuffle masks matches. |
| 387 | for (unsigned j = 0; j != MaskStart; ++j) |
| 388 | ShuffleMask[j] = MaskStart + j; |
| 389 | // Fill the rest of the mask with -1 for undef. |
| 390 | std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); |
| 391 | |
| 392 | SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); |
Benjamin Kramer | 147644d | 2014-04-18 19:48:03 +0000 | [diff] [blame] | 393 | if (ShuffleMask != Mask) |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 394 | return false; |
| 395 | |
| 396 | RdxOp = NextRdxOp; |
| 397 | NumVecElemsRemain /= 2; |
| 398 | MaskStart *= 2; |
| 399 | } |
| 400 | |
| 401 | Opcode = RdxOpcode; |
| 402 | Ty = VecTy; |
| 403 | return true; |
| 404 | } |
| 405 | |
Nadav Rotem | ce5db0f | 2012-12-03 22:47:12 +0000 | [diff] [blame] | 406 | unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { |
Chandler Carruth | cf569a8 | 2013-01-05 10:09:33 +0000 | [diff] [blame] | 407 | if (!TTI) |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 408 | return -1; |
| 409 | |
| 410 | switch (I->getOpcode()) { |
Jingyue Wu | bfefff5 | 2015-07-26 19:10:03 +0000 | [diff] [blame] | 411 | case Instruction::GetElementPtr: |
| 412 | return TTI->getUserCost(I); |
Arnold Schwaighofer | 594fa2d | 2013-02-08 14:50:48 +0000 | [diff] [blame] | 413 | |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 414 | case Instruction::Ret: |
| 415 | case Instruction::PHI: |
| 416 | case Instruction::Br: { |
Chandler Carruth | cf569a8 | 2013-01-05 10:09:33 +0000 | [diff] [blame] | 417 | return TTI->getCFInstrCost(I->getOpcode()); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 418 | } |
| 419 | case Instruction::Add: |
| 420 | case Instruction::FAdd: |
| 421 | case Instruction::Sub: |
| 422 | case Instruction::FSub: |
| 423 | case Instruction::Mul: |
| 424 | case Instruction::FMul: |
| 425 | case Instruction::UDiv: |
| 426 | case Instruction::SDiv: |
| 427 | case Instruction::FDiv: |
| 428 | case Instruction::URem: |
| 429 | case Instruction::SRem: |
| 430 | case Instruction::FRem: |
| 431 | case Instruction::Shl: |
| 432 | case Instruction::LShr: |
| 433 | case Instruction::AShr: |
| 434 | case Instruction::And: |
| 435 | case Instruction::Or: |
| 436 | case Instruction::Xor: { |
Arnold Schwaighofer | b977387 | 2013-04-04 23:26:21 +0000 | [diff] [blame] | 437 | TargetTransformInfo::OperandValueKind Op1VK = |
| 438 | getOperandInfo(I->getOperand(0)); |
| 439 | TargetTransformInfo::OperandValueKind Op2VK = |
| 440 | getOperandInfo(I->getOperand(1)); |
Mohammed Agabaria | 2c96c43 | 2017-01-11 08:23:37 +0000 | [diff] [blame] | 441 | SmallVector<const Value*, 2> Operands(I->operand_values()); |
Arnold Schwaighofer | b977387 | 2013-04-04 23:26:21 +0000 | [diff] [blame] | 442 | return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, |
Mohammed Agabaria | 2c96c43 | 2017-01-11 08:23:37 +0000 | [diff] [blame] | 443 | Op2VK, TargetTransformInfo::OP_None, |
| 444 | TargetTransformInfo::OP_None, |
| 445 | Operands); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 446 | } |
| 447 | case Instruction::Select: { |
Nadav Rotem | ce5db0f | 2012-12-03 22:47:12 +0000 | [diff] [blame] | 448 | const SelectInst *SI = cast<SelectInst>(I); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 449 | Type *CondTy = SI->getCondition()->getType(); |
Jonas Paulsson | fccc7d6 | 2017-04-12 11:49:08 +0000 | [diff] [blame] | 450 | return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 451 | } |
| 452 | case Instruction::ICmp: |
| 453 | case Instruction::FCmp: { |
| 454 | Type *ValTy = I->getOperand(0)->getType(); |
Jonas Paulsson | fccc7d6 | 2017-04-12 11:49:08 +0000 | [diff] [blame] | 455 | return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 456 | } |
| 457 | case Instruction::Store: { |
Nadav Rotem | ce5db0f | 2012-12-03 22:47:12 +0000 | [diff] [blame] | 458 | const StoreInst *SI = cast<StoreInst>(I); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 459 | Type *ValTy = SI->getValueOperand()->getType(); |
Chandler Carruth | cf569a8 | 2013-01-05 10:09:33 +0000 | [diff] [blame] | 460 | return TTI->getMemoryOpCost(I->getOpcode(), ValTy, |
Jonas Paulsson | fccc7d6 | 2017-04-12 11:49:08 +0000 | [diff] [blame] | 461 | SI->getAlignment(), |
| 462 | SI->getPointerAddressSpace(), I); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 463 | } |
| 464 | case Instruction::Load: { |
Nadav Rotem | ce5db0f | 2012-12-03 22:47:12 +0000 | [diff] [blame] | 465 | const LoadInst *LI = cast<LoadInst>(I); |
Chandler Carruth | cf569a8 | 2013-01-05 10:09:33 +0000 | [diff] [blame] | 466 | return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), |
Jonas Paulsson | fccc7d6 | 2017-04-12 11:49:08 +0000 | [diff] [blame] | 467 | LI->getAlignment(), |
| 468 | LI->getPointerAddressSpace(), I); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 469 | } |
| 470 | case Instruction::ZExt: |
| 471 | case Instruction::SExt: |
| 472 | case Instruction::FPToUI: |
| 473 | case Instruction::FPToSI: |
| 474 | case Instruction::FPExt: |
| 475 | case Instruction::PtrToInt: |
| 476 | case Instruction::IntToPtr: |
| 477 | case Instruction::SIToFP: |
| 478 | case Instruction::UIToFP: |
| 479 | case Instruction::Trunc: |
| 480 | case Instruction::FPTrunc: |
Matt Arsenault | 339506d | 2014-01-22 20:30:16 +0000 | [diff] [blame] | 481 | case Instruction::BitCast: |
| 482 | case Instruction::AddrSpaceCast: { |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 483 | Type *SrcTy = I->getOperand(0)->getType(); |
Jonas Paulsson | fccc7d6 | 2017-04-12 11:49:08 +0000 | [diff] [blame] | 484 | return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 485 | } |
Nadav Rotem | 13da947 | 2012-11-02 22:31:56 +0000 | [diff] [blame] | 486 | case Instruction::ExtractElement: { |
Nadav Rotem | ce5db0f | 2012-12-03 22:47:12 +0000 | [diff] [blame] | 487 | const ExtractElementInst * EEI = cast<ExtractElementInst>(I); |
Nadav Rotem | 13da947 | 2012-11-02 22:31:56 +0000 | [diff] [blame] | 488 | ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); |
| 489 | unsigned Idx = -1; |
| 490 | if (CI) |
| 491 | Idx = CI->getZExtValue(); |
Arnold Schwaighofer | cae8735 | 2013-09-17 18:06:50 +0000 | [diff] [blame] | 492 | |
| 493 | // Try to match a reduction sequence (series of shufflevector and vector |
| 494 | // adds followed by a extractelement). |
| 495 | unsigned ReduxOpCode; |
| 496 | Type *ReduxType; |
| 497 | |
| 498 | if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) |
| 499 | return TTI->getReductionCost(ReduxOpCode, ReduxType, false); |
| 500 | else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) |
| 501 | return TTI->getReductionCost(ReduxOpCode, ReduxType, true); |
| 502 | |
Chandler Carruth | cf569a8 | 2013-01-05 10:09:33 +0000 | [diff] [blame] | 503 | return TTI->getVectorInstrCost(I->getOpcode(), |
| 504 | EEI->getOperand(0)->getType(), Idx); |
Nadav Rotem | 13da947 | 2012-11-02 22:31:56 +0000 | [diff] [blame] | 505 | } |
| 506 | case Instruction::InsertElement: { |
Craig Topper | 3703964 | 2013-07-11 05:39:44 +0000 | [diff] [blame] | 507 | const InsertElementInst * IE = cast<InsertElementInst>(I); |
| 508 | ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); |
| 509 | unsigned Idx = -1; |
| 510 | if (CI) |
| 511 | Idx = CI->getZExtValue(); |
| 512 | return TTI->getVectorInstrCost(I->getOpcode(), |
| 513 | IE->getType(), Idx); |
| 514 | } |
Arnold Schwaighofer | 7e2ca6e | 2013-02-12 02:40:37 +0000 | [diff] [blame] | 515 | case Instruction::ShuffleVector: { |
| 516 | const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); |
| 517 | Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); |
| 518 | unsigned NumVecElems = VecTypOp0->getVectorNumElements(); |
| 519 | SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); |
| 520 | |
Andrea Di Biagio | c8e8bda | 2014-07-03 22:24:18 +0000 | [diff] [blame] | 521 | if (NumVecElems == Mask.size()) { |
| 522 | if (isReverseVectorMask(Mask)) |
| 523 | return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, |
| 524 | 0, nullptr); |
| 525 | if (isAlternateVectorMask(Mask)) |
| 526 | return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, |
| 527 | VecTypOp0, 0, nullptr); |
Elena Demikhovsky | 21706cb | 2017-01-02 10:37:52 +0000 | [diff] [blame] | 528 | |
| 529 | if (isZeroEltBroadcastVectorMask(Mask)) |
| 530 | return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, |
| 531 | VecTypOp0, 0, nullptr); |
| 532 | |
| 533 | if (isSingleSourceVectorMask(Mask)) |
| 534 | return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, |
| 535 | VecTypOp0, 0, nullptr); |
| 536 | |
| 537 | return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, |
| 538 | VecTypOp0, 0, nullptr); |
Andrea Di Biagio | c8e8bda | 2014-07-03 22:24:18 +0000 | [diff] [blame] | 539 | } |
| 540 | |
Arnold Schwaighofer | 7e2ca6e | 2013-02-12 02:40:37 +0000 | [diff] [blame] | 541 | return -1; |
| 542 | } |
Benjamin Kramer | f7cfac7 | 2013-02-28 19:09:33 +0000 | [diff] [blame] | 543 | case Instruction::Call: |
| 544 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { |
Jonas Paulsson | a48ea23 | 2017-03-14 06:35:36 +0000 | [diff] [blame] | 545 | SmallVector<Value *, 4> Args(II->arg_operands()); |
Benjamin Kramer | f7cfac7 | 2013-02-28 19:09:33 +0000 | [diff] [blame] | 546 | |
David Majnemer | 0f26b0a | 2016-04-14 07:13:24 +0000 | [diff] [blame] | 547 | FastMathFlags FMF; |
| 548 | if (auto *FPMO = dyn_cast<FPMathOperator>(II)) |
| 549 | FMF = FPMO->getFastMathFlags(); |
| 550 | |
Benjamin Kramer | f7cfac7 | 2013-02-28 19:09:33 +0000 | [diff] [blame] | 551 | return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), |
David Majnemer | 0f26b0a | 2016-04-14 07:13:24 +0000 | [diff] [blame] | 552 | Args, FMF); |
Benjamin Kramer | f7cfac7 | 2013-02-28 19:09:33 +0000 | [diff] [blame] | 553 | } |
| 554 | return -1; |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 555 | default: |
| 556 | // We don't have any information on this instruction. |
| 557 | return -1; |
| 558 | } |
| 559 | } |
| 560 | |
| 561 | void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { |
| 562 | if (!F) |
| 563 | return; |
| 564 | |
Benjamin Kramer | aa20915 | 2016-06-26 17:27:42 +0000 | [diff] [blame] | 565 | for (BasicBlock &B : *F) { |
| 566 | for (Instruction &Inst : B) { |
| 567 | unsigned Cost = getInstructionCost(&Inst); |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 568 | if (Cost != (unsigned)-1) |
| 569 | OS << "Cost Model: Found an estimated cost of " << Cost; |
| 570 | else |
| 571 | OS << "Cost Model: Unknown cost"; |
| 572 | |
Benjamin Kramer | aa20915 | 2016-06-26 17:27:42 +0000 | [diff] [blame] | 573 | OS << " for instruction: " << Inst << "\n"; |
Nadav Rotem | a6b91ac | 2012-11-02 21:48:17 +0000 | [diff] [blame] | 574 | } |
| 575 | } |
| 576 | } |