Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 1 | //===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // This pass merges globals with internal linkage into one. This way all the |
| 10 | // globals which were merged into a biggest one can be addressed using offsets |
| 11 | // from the same base pointer (no need for separate base pointer for each of the |
| 12 | // global). Such a transformation can significantly reduce the register pressure |
| 13 | // when many globals are involved. |
| 14 | // |
| 15 | // For example, consider the code which touches several global variables at once: |
| 16 | // |
| 17 | // static int foo[N], bar[N], baz[N]; |
| 18 | // |
| 19 | // for (i = 0; i < N; ++i) { |
| 20 | // foo[i] = bar[i] * baz[i]; |
| 21 | // } |
| 22 | // |
| 23 | // On ARM the addresses of 3 arrays should be kept in the registers, thus |
| 24 | // this code has quite large register pressure (loop body): |
| 25 | // |
| 26 | // ldr r1, [r5], #4 |
| 27 | // ldr r2, [r6], #4 |
| 28 | // mul r1, r2, r1 |
| 29 | // str r1, [r0], #4 |
| 30 | // |
| 31 | // Pass converts the code to something like: |
| 32 | // |
| 33 | // static struct { |
| 34 | // int foo[N]; |
| 35 | // int bar[N]; |
| 36 | // int baz[N]; |
| 37 | // } merged; |
| 38 | // |
| 39 | // for (i = 0; i < N; ++i) { |
| 40 | // merged.foo[i] = merged.bar[i] * merged.baz[i]; |
| 41 | // } |
| 42 | // |
| 43 | // and in ARM code this becomes: |
| 44 | // |
| 45 | // ldr r0, [r5, #40] |
| 46 | // ldr r1, [r5, #80] |
| 47 | // mul r0, r1, r0 |
| 48 | // str r0, [r5], #4 |
| 49 | // |
| 50 | // note that we saved 2 registers here almostly "for free". |
| 51 | // ===----------------------------------------------------------------------===// |
| 52 | |
| 53 | #define DEBUG_TYPE "arm-global-merge" |
| 54 | #include "ARM.h" |
| 55 | #include "llvm/CodeGen/Passes.h" |
| 56 | #include "llvm/Attributes.h" |
| 57 | #include "llvm/Constants.h" |
| 58 | #include "llvm/DerivedTypes.h" |
| 59 | #include "llvm/Function.h" |
| 60 | #include "llvm/GlobalVariable.h" |
| 61 | #include "llvm/Instructions.h" |
| 62 | #include "llvm/Intrinsics.h" |
| 63 | #include "llvm/Module.h" |
| 64 | #include "llvm/Pass.h" |
| 65 | #include "llvm/Target/TargetData.h" |
| 66 | #include "llvm/Target/TargetLowering.h" |
| 67 | using namespace llvm; |
| 68 | |
| 69 | namespace { |
| 70 | class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass { |
| 71 | /// TLI - Keep a pointer of a TargetLowering to consult for determining |
| 72 | /// target type sizes. |
| 73 | const TargetLowering *TLI; |
| 74 | |
| 75 | bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, |
| 76 | Module &M, bool) const; |
| 77 | |
| 78 | public: |
| 79 | static char ID; // Pass identification, replacement for typeid. |
| 80 | explicit ARMGlobalMerge(const TargetLowering *tli) |
| 81 | : FunctionPass(&ID), TLI(tli) {} |
| 82 | |
| 83 | virtual bool doInitialization(Module &M); |
| 84 | virtual bool runOnFunction(Function& F); |
| 85 | |
| 86 | const char *getPassName() const { |
| 87 | return "Merge internal globals"; |
| 88 | } |
| 89 | |
| 90 | virtual void getAnalysisUsage(AnalysisUsage &AU) const { |
| 91 | AU.setPreservesCFG(); |
| 92 | FunctionPass::getAnalysisUsage(AU); |
| 93 | } |
| 94 | |
| 95 | struct GlobalCmp { |
| 96 | const TargetData *TD; |
| 97 | |
| 98 | GlobalCmp(const TargetData *td): |
Douglas Gregor | 037b5e4 | 2010-07-25 17:34:42 +0000 | [diff] [blame^] | 99 | TD(td) { } |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 100 | |
| 101 | bool operator() (const GlobalVariable* GV1, |
| 102 | const GlobalVariable* GV2) { |
| 103 | const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType(); |
| 104 | const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType(); |
| 105 | |
| 106 | return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); |
| 107 | } |
| 108 | }; |
| 109 | }; |
| 110 | } // end anonymous namespace |
| 111 | |
| 112 | char ARMGlobalMerge::ID = 0; |
| 113 | |
| 114 | bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, |
| 115 | Module &M, bool isConst) const { |
| 116 | const TargetData *TD = TLI->getTargetData(); |
| 117 | |
| 118 | // FIXME: Infer the maximum possible offset depending on the actual users |
| 119 | // (these max offsets are different for the users inside Thumb or ARM |
| 120 | // functions) |
| 121 | unsigned MaxOffset = TLI->getMaximalGlobalOffset(); |
| 122 | |
| 123 | // FIXME: Find better heuristics |
| 124 | std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); |
| 125 | |
| 126 | const Type *Int32Ty = Type::getInt32Ty(M.getContext()); |
| 127 | |
| 128 | for (size_t i = 0, e = Globals.size(); i != e; ) { |
| 129 | size_t j = 0; |
| 130 | uint64_t MergedSize = 0; |
| 131 | std::vector<const Type*> Tys; |
| 132 | std::vector<Constant*> Inits; |
| 133 | for (j = i; MergedSize < MaxOffset && j != e; ++j) { |
| 134 | const Type* Ty = Globals[j]->getType()->getElementType(); |
| 135 | Tys.push_back(Ty); |
| 136 | Inits.push_back(Globals[j]->getInitializer()); |
| 137 | MergedSize += TD->getTypeAllocSize(Ty); |
| 138 | } |
| 139 | |
| 140 | StructType* MergedTy = StructType::get(M.getContext(), Tys); |
| 141 | Constant* MergedInit = ConstantStruct::get(MergedTy, Inits); |
| 142 | GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst, |
| 143 | GlobalValue::InternalLinkage, |
| 144 | MergedInit, "merged"); |
| 145 | for (size_t k = i; k < j; ++k) { |
| 146 | SmallVector<Constant*, 2> Idx; |
| 147 | Idx.push_back(ConstantInt::get(Int32Ty, 0)); |
| 148 | Idx.push_back(ConstantInt::get(Int32Ty, k-i)); |
| 149 | |
| 150 | Constant* GEP = |
| 151 | ConstantExpr::getInBoundsGetElementPtr(MergedGV, |
| 152 | &Idx[0], Idx.size()); |
| 153 | |
| 154 | Globals[k]->replaceAllUsesWith(GEP); |
| 155 | Globals[k]->eraseFromParent(); |
| 156 | } |
| 157 | i = j; |
| 158 | } |
| 159 | |
| 160 | return true; |
| 161 | } |
| 162 | |
| 163 | |
| 164 | bool ARMGlobalMerge::doInitialization(Module& M) { |
| 165 | SmallVector<GlobalVariable*, 16> Globals, ConstGlobals; |
| 166 | const TargetData *TD = TLI->getTargetData(); |
| 167 | unsigned MaxOffset = TLI->getMaximalGlobalOffset(); |
| 168 | bool Changed = false; |
| 169 | |
| 170 | // Grab all non-const globals. |
| 171 | for (Module::global_iterator I = M.global_begin(), |
| 172 | E = M.global_end(); I != E; ++I) { |
| 173 | // Merge is safe for "normal" internal globals only |
| 174 | if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) |
| 175 | continue; |
| 176 | |
| 177 | // Ignore fancy-aligned globals for now. |
| 178 | if (I->getAlignment() != 0) |
| 179 | continue; |
| 180 | |
| 181 | if (TD->getTypeAllocSize(I->getType()) < MaxOffset) { |
| 182 | if (I->isConstant()) |
| 183 | ConstGlobals.push_back(I); |
| 184 | else |
| 185 | Globals.push_back(I); |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | if (Globals.size() > 1) |
| 190 | Changed |= doMerge(Globals, M, false); |
| 191 | if (ConstGlobals.size() > 1) |
| 192 | Changed |= doMerge(ConstGlobals, M, true); |
| 193 | |
| 194 | return Changed; |
| 195 | } |
| 196 | |
| 197 | bool ARMGlobalMerge::runOnFunction(Function& F) { |
| 198 | return false; |
| 199 | } |
| 200 | |
| 201 | FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) { |
| 202 | return new ARMGlobalMerge(tli); |
| 203 | } |