Stanislav Mekhanoshin | ad04e7a | 2019-06-17 17:47:28 +0000 | [diff] [blame] | 1 | //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// \brief This pass propagates attributes from kernels to the non-entry |
| 11 | /// functions. Most of the library functions were not compiled for specific ABI, |
| 12 | /// yet will be correctly compiled if proper attrbutes are propagated from the |
| 13 | /// caller. |
| 14 | /// |
| 15 | /// The pass analyzes call graph and propagates ABI target features through the |
| 16 | /// call graph. |
| 17 | /// |
| 18 | /// It can run in two modes: as a function or module pass. A function pass |
| 19 | /// simply propagates attributes. A module pass clones functions if there are |
| 20 | /// callers with different ABI. If a function is clonned all call sites will |
| 21 | /// be updated to use a correct clone. |
| 22 | /// |
| 23 | /// A function pass is limited in functionality but can run early in the |
| 24 | /// pipeline. A module pass is more powerful but has to run late, so misses |
| 25 | /// library folding opportunities. |
| 26 | // |
| 27 | //===----------------------------------------------------------------------===// |
| 28 | |
Stanislav Mekhanoshin | ad04e7a | 2019-06-17 17:47:28 +0000 | [diff] [blame] | 29 | #include "AMDGPU.h" |
| 30 | #include "AMDGPUSubtarget.h" |
| 31 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 32 | #include "Utils/AMDGPUBaseInfo.h" |
| 33 | #include "llvm/ADT/SmallSet.h" |
| 34 | #include "llvm/ADT/SmallVector.h" |
| 35 | #include "llvm/IR/Function.h" |
| 36 | #include "llvm/IR/Module.h" |
| 37 | #include "llvm/Target/TargetMachine.h" |
| 38 | #include "llvm/Transforms/Utils/Cloning.h" |
| 39 | #include <string> |
| 40 | |
Matt Arsenault | 5e64303 | 2019-07-08 18:48:39 +0000 | [diff] [blame] | 41 | #define DEBUG_TYPE "amdgpu-propagate-attributes" |
| 42 | |
Stanislav Mekhanoshin | ad04e7a | 2019-06-17 17:47:28 +0000 | [diff] [blame] | 43 | using namespace llvm; |
| 44 | |
| 45 | namespace llvm { |
| 46 | extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; |
| 47 | } |
| 48 | |
| 49 | namespace { |
| 50 | |
| 51 | class AMDGPUPropagateAttributes { |
| 52 | const FeatureBitset TargetFeatures = { |
| 53 | AMDGPU::FeatureWavefrontSize16, |
| 54 | AMDGPU::FeatureWavefrontSize32, |
| 55 | AMDGPU::FeatureWavefrontSize64 |
| 56 | }; |
| 57 | |
| 58 | class Clone{ |
| 59 | public: |
| 60 | Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) : |
| 61 | FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {} |
| 62 | |
| 63 | FeatureBitset FeatureMask; |
| 64 | Function *OrigF; |
| 65 | Function *NewF; |
| 66 | }; |
| 67 | |
| 68 | const TargetMachine *TM; |
| 69 | |
| 70 | // Clone functions as needed or just set attributes. |
| 71 | bool AllowClone; |
| 72 | |
| 73 | // Option propagation roots. |
| 74 | SmallSet<Function *, 32> Roots; |
| 75 | |
| 76 | // Clones of functions with their attributes. |
| 77 | SmallVector<Clone, 32> Clones; |
| 78 | |
| 79 | // Find a clone with required features. |
| 80 | Function *findFunction(const FeatureBitset &FeaturesNeeded, |
| 81 | Function *OrigF); |
| 82 | |
| 83 | // Clone function F and set NewFeatures on the clone. |
| 84 | // Cole takes the name of original function. |
| 85 | Function *cloneWithFeatures(Function &F, |
| 86 | const FeatureBitset &NewFeatures); |
| 87 | |
| 88 | // Set new function's features in place. |
| 89 | void setFeatures(Function &F, const FeatureBitset &NewFeatures); |
| 90 | |
| 91 | std::string getFeatureString(const FeatureBitset &Features) const; |
| 92 | |
| 93 | // Propagate attributes from Roots. |
| 94 | bool process(); |
| 95 | |
| 96 | public: |
| 97 | AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : |
| 98 | TM(TM), AllowClone(AllowClone) {} |
| 99 | |
| 100 | // Use F as a root and propagate its attributes. |
| 101 | bool process(Function &F); |
| 102 | |
| 103 | // Propagate attributes starting from kernel functions. |
| 104 | bool process(Module &M); |
| 105 | }; |
| 106 | |
| 107 | // Allows to propagate attributes early, but no clonning is allowed as it must |
| 108 | // be a function pass to run before any optimizations. |
| 109 | // TODO: We shall only need a one instance of module pass, but that needs to be |
| 110 | // in the linker pipeline which is currently not possible. |
| 111 | class AMDGPUPropagateAttributesEarly : public FunctionPass { |
| 112 | const TargetMachine *TM; |
| 113 | |
| 114 | public: |
| 115 | static char ID; // Pass identification |
| 116 | |
| 117 | AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : |
| 118 | FunctionPass(ID), TM(TM) { |
| 119 | initializeAMDGPUPropagateAttributesEarlyPass( |
| 120 | *PassRegistry::getPassRegistry()); |
| 121 | } |
| 122 | |
| 123 | bool runOnFunction(Function &F) override; |
| 124 | }; |
| 125 | |
| 126 | // Allows to propagate attributes with clonning but does that late in the |
| 127 | // pipeline. |
| 128 | class AMDGPUPropagateAttributesLate : public ModulePass { |
| 129 | const TargetMachine *TM; |
| 130 | |
| 131 | public: |
| 132 | static char ID; // Pass identification |
| 133 | |
| 134 | AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : |
| 135 | ModulePass(ID), TM(TM) { |
| 136 | initializeAMDGPUPropagateAttributesLatePass( |
| 137 | *PassRegistry::getPassRegistry()); |
| 138 | } |
| 139 | |
| 140 | bool runOnModule(Module &M) override; |
| 141 | }; |
| 142 | |
| 143 | } // end anonymous namespace. |
| 144 | |
| 145 | char AMDGPUPropagateAttributesEarly::ID = 0; |
| 146 | char AMDGPUPropagateAttributesLate::ID = 0; |
| 147 | |
| 148 | INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, |
| 149 | "amdgpu-propagate-attributes-early", |
| 150 | "Early propagate attributes from kernels to functions", |
| 151 | false, false) |
| 152 | INITIALIZE_PASS(AMDGPUPropagateAttributesLate, |
| 153 | "amdgpu-propagate-attributes-late", |
| 154 | "Late propagate attributes from kernels to functions", |
| 155 | false, false) |
| 156 | |
| 157 | Function * |
| 158 | AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded, |
| 159 | Function *OrigF) { |
| 160 | // TODO: search for clone's clones. |
| 161 | for (Clone &C : Clones) |
| 162 | if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask) |
| 163 | return C.NewF; |
| 164 | |
| 165 | return nullptr; |
| 166 | } |
| 167 | |
| 168 | bool AMDGPUPropagateAttributes::process(Module &M) { |
| 169 | for (auto &F : M.functions()) |
| 170 | if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) |
| 171 | Roots.insert(&F); |
| 172 | |
| 173 | return process(); |
| 174 | } |
| 175 | |
| 176 | bool AMDGPUPropagateAttributes::process(Function &F) { |
| 177 | Roots.insert(&F); |
| 178 | return process(); |
| 179 | } |
| 180 | |
| 181 | bool AMDGPUPropagateAttributes::process() { |
| 182 | bool Changed = false; |
| 183 | SmallSet<Function *, 32> NewRoots; |
| 184 | SmallSet<Function *, 32> Replaced; |
| 185 | |
| 186 | if (Roots.empty()) |
| 187 | return false; |
| 188 | Module &M = *(*Roots.begin())->getParent(); |
| 189 | |
| 190 | do { |
| 191 | Roots.insert(NewRoots.begin(), NewRoots.end()); |
| 192 | NewRoots.clear(); |
| 193 | |
| 194 | for (auto &F : M.functions()) { |
| 195 | if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F)) |
| 196 | continue; |
| 197 | |
| 198 | const FeatureBitset &CalleeBits = |
| 199 | TM->getSubtargetImpl(F)->getFeatureBits(); |
| 200 | SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; |
| 201 | |
| 202 | for (User *U : F.users()) { |
| 203 | Instruction *I = dyn_cast<Instruction>(U); |
| 204 | if (!I) |
| 205 | continue; |
| 206 | CallBase *CI = dyn_cast<CallBase>(I); |
| 207 | if (!CI) |
| 208 | continue; |
| 209 | Function *Caller = CI->getCaller(); |
| 210 | if (!Caller) |
| 211 | continue; |
| 212 | if (!Roots.count(Caller)) |
| 213 | continue; |
| 214 | |
| 215 | const FeatureBitset &CallerBits = |
| 216 | TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures; |
| 217 | |
| 218 | if (CallerBits == (CalleeBits & TargetFeatures)) { |
| 219 | NewRoots.insert(&F); |
| 220 | continue; |
| 221 | } |
| 222 | |
| 223 | Function *NewF = findFunction(CallerBits, &F); |
| 224 | if (!NewF) { |
| 225 | FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) | |
| 226 | CallerBits); |
| 227 | if (!AllowClone) { |
| 228 | // This may set different features on different iteartions if |
| 229 | // there is a contradiction in callers' attributes. In this case |
| 230 | // we rely on a second pass running on Module, which is allowed |
| 231 | // to clone. |
| 232 | setFeatures(F, NewFeatures); |
| 233 | NewRoots.insert(&F); |
| 234 | Changed = true; |
| 235 | break; |
| 236 | } |
| 237 | |
| 238 | NewF = cloneWithFeatures(F, NewFeatures); |
| 239 | Clones.push_back(Clone(CallerBits, &F, NewF)); |
| 240 | NewRoots.insert(NewF); |
| 241 | } |
| 242 | |
| 243 | ToReplace.push_back(std::make_pair(CI, NewF)); |
| 244 | Replaced.insert(&F); |
| 245 | |
| 246 | Changed = true; |
| 247 | } |
| 248 | |
| 249 | while (!ToReplace.empty()) { |
| 250 | auto R = ToReplace.pop_back_val(); |
| 251 | R.first->setCalledFunction(R.second); |
| 252 | } |
| 253 | } |
| 254 | } while (!NewRoots.empty()); |
| 255 | |
| 256 | for (Function *F : Replaced) { |
| 257 | if (F->use_empty()) |
| 258 | F->eraseFromParent(); |
| 259 | } |
| 260 | |
| 261 | return Changed; |
| 262 | } |
| 263 | |
| 264 | Function * |
| 265 | AMDGPUPropagateAttributes::cloneWithFeatures(Function &F, |
| 266 | const FeatureBitset &NewFeatures) { |
| 267 | LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); |
| 268 | |
| 269 | ValueToValueMapTy dummy; |
| 270 | Function *NewF = CloneFunction(&F, dummy); |
| 271 | setFeatures(*NewF, NewFeatures); |
| 272 | |
| 273 | // Swap names. If that is the only clone it will retain the name of now |
| 274 | // dead value. |
| 275 | if (F.hasName()) { |
| 276 | std::string NewName = NewF->getName(); |
| 277 | NewF->takeName(&F); |
| 278 | F.setName(NewName); |
| 279 | |
| 280 | // Name has changed, it does not need an external symbol. |
| 281 | F.setVisibility(GlobalValue::DefaultVisibility); |
| 282 | F.setLinkage(GlobalValue::InternalLinkage); |
| 283 | } |
| 284 | |
| 285 | return NewF; |
| 286 | } |
| 287 | |
| 288 | void AMDGPUPropagateAttributes::setFeatures(Function &F, |
| 289 | const FeatureBitset &NewFeatures) { |
| 290 | std::string NewFeatureStr = getFeatureString(NewFeatures); |
| 291 | |
| 292 | LLVM_DEBUG(dbgs() << "Set features " |
| 293 | << getFeatureString(NewFeatures & TargetFeatures) |
| 294 | << " on " << F.getName() << '\n'); |
| 295 | |
| 296 | F.removeFnAttr("target-features"); |
| 297 | F.addFnAttr("target-features", NewFeatureStr); |
| 298 | } |
| 299 | |
| 300 | std::string |
| 301 | AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const |
| 302 | { |
| 303 | std::string Ret; |
| 304 | for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { |
| 305 | if (Features[KV.Value]) |
| 306 | Ret += (StringRef("+") + KV.Key + ",").str(); |
| 307 | else if (TargetFeatures[KV.Value]) |
| 308 | Ret += (StringRef("-") + KV.Key + ",").str(); |
| 309 | } |
| 310 | Ret.pop_back(); // Remove last comma. |
| 311 | return Ret; |
| 312 | } |
| 313 | |
| 314 | bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { |
| 315 | if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) |
| 316 | return false; |
| 317 | |
| 318 | return AMDGPUPropagateAttributes(TM, false).process(F); |
| 319 | } |
| 320 | |
| 321 | bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { |
| 322 | if (!TM) |
| 323 | return false; |
| 324 | |
| 325 | return AMDGPUPropagateAttributes(TM, true).process(M); |
| 326 | } |
| 327 | |
| 328 | FunctionPass |
| 329 | *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { |
| 330 | return new AMDGPUPropagateAttributesEarly(TM); |
| 331 | } |
| 332 | |
| 333 | ModulePass |
| 334 | *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { |
| 335 | return new AMDGPUPropagateAttributesLate(TM); |
| 336 | } |