Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 1 | //===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // This pass merges globals with internal linkage into one. This way all the |
| 10 | // globals which were merged into a biggest one can be addressed using offsets |
| 11 | // from the same base pointer (no need for separate base pointer for each of the |
| 12 | // global). Such a transformation can significantly reduce the register pressure |
| 13 | // when many globals are involved. |
| 14 | // |
Eric Christopher | a99c3e9 | 2010-09-28 04:18:29 +0000 | [diff] [blame] | 15 | // For example, consider the code which touches several global variables at |
| 16 | // once: |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 17 | // |
| 18 | // static int foo[N], bar[N], baz[N]; |
| 19 | // |
| 20 | // for (i = 0; i < N; ++i) { |
| 21 | // foo[i] = bar[i] * baz[i]; |
| 22 | // } |
| 23 | // |
| 24 | // On ARM the addresses of 3 arrays should be kept in the registers, thus |
| 25 | // this code has quite large register pressure (loop body): |
| 26 | // |
| 27 | // ldr r1, [r5], #4 |
| 28 | // ldr r2, [r6], #4 |
| 29 | // mul r1, r2, r1 |
| 30 | // str r1, [r0], #4 |
| 31 | // |
| 32 | // Pass converts the code to something like: |
| 33 | // |
| 34 | // static struct { |
| 35 | // int foo[N]; |
| 36 | // int bar[N]; |
| 37 | // int baz[N]; |
| 38 | // } merged; |
| 39 | // |
| 40 | // for (i = 0; i < N; ++i) { |
| 41 | // merged.foo[i] = merged.bar[i] * merged.baz[i]; |
| 42 | // } |
| 43 | // |
| 44 | // and in ARM code this becomes: |
| 45 | // |
| 46 | // ldr r0, [r5, #40] |
| 47 | // ldr r1, [r5, #80] |
| 48 | // mul r0, r1, r0 |
| 49 | // str r0, [r5], #4 |
| 50 | // |
| 51 | // note that we saved 2 registers here almostly "for free". |
Eric Christopher | a99c3e9 | 2010-09-28 04:18:29 +0000 | [diff] [blame] | 52 | // ===---------------------------------------------------------------------===// |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 53 | |
| 54 | #define DEBUG_TYPE "arm-global-merge" |
| 55 | #include "ARM.h" |
| 56 | #include "llvm/CodeGen/Passes.h" |
| 57 | #include "llvm/Attributes.h" |
| 58 | #include "llvm/Constants.h" |
| 59 | #include "llvm/DerivedTypes.h" |
| 60 | #include "llvm/Function.h" |
| 61 | #include "llvm/GlobalVariable.h" |
| 62 | #include "llvm/Instructions.h" |
| 63 | #include "llvm/Intrinsics.h" |
| 64 | #include "llvm/Module.h" |
| 65 | #include "llvm/Pass.h" |
| 66 | #include "llvm/Target/TargetData.h" |
| 67 | #include "llvm/Target/TargetLowering.h" |
Bob Wilson | 0564609 | 2010-11-17 21:25:39 +0000 | [diff] [blame] | 68 | #include "llvm/Target/TargetLoweringObjectFile.h" |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 69 | using namespace llvm; |
| 70 | |
| 71 | namespace { |
Chris Lattner | 252b491 | 2010-09-05 21:18:45 +0000 | [diff] [blame] | 72 | class ARMGlobalMerge : public FunctionPass { |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 73 | /// TLI - Keep a pointer of a TargetLowering to consult for determining |
| 74 | /// target type sizes. |
| 75 | const TargetLowering *TLI; |
| 76 | |
| 77 | bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, |
Bob Wilson | 0564609 | 2010-11-17 21:25:39 +0000 | [diff] [blame] | 78 | Module &M, bool isConst) const; |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 79 | |
| 80 | public: |
| 81 | static char ID; // Pass identification, replacement for typeid. |
| 82 | explicit ARMGlobalMerge(const TargetLowering *tli) |
Owen Anderson | 90c579d | 2010-08-06 18:33:48 +0000 | [diff] [blame] | 83 | : FunctionPass(ID), TLI(tli) {} |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 84 | |
| 85 | virtual bool doInitialization(Module &M); |
Chris Lattner | 252b491 | 2010-09-05 21:18:45 +0000 | [diff] [blame] | 86 | virtual bool runOnFunction(Function &F); |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 87 | |
| 88 | const char *getPassName() const { |
| 89 | return "Merge internal globals"; |
| 90 | } |
| 91 | |
| 92 | virtual void getAnalysisUsage(AnalysisUsage &AU) const { |
| 93 | AU.setPreservesCFG(); |
| 94 | FunctionPass::getAnalysisUsage(AU); |
| 95 | } |
| 96 | |
| 97 | struct GlobalCmp { |
| 98 | const TargetData *TD; |
| 99 | |
Chris Lattner | 252b491 | 2010-09-05 21:18:45 +0000 | [diff] [blame] | 100 | GlobalCmp(const TargetData *td) : TD(td) { } |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 101 | |
Chris Lattner | 252b491 | 2010-09-05 21:18:45 +0000 | [diff] [blame] | 102 | bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) { |
Chris Lattner | db125cf | 2011-07-18 04:54:35 +0000 | [diff] [blame^] | 103 | Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); |
| 104 | Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 105 | |
| 106 | return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); |
| 107 | } |
| 108 | }; |
| 109 | }; |
| 110 | } // end anonymous namespace |
| 111 | |
| 112 | char ARMGlobalMerge::ID = 0; |
| 113 | |
| 114 | bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, |
| 115 | Module &M, bool isConst) const { |
| 116 | const TargetData *TD = TLI->getTargetData(); |
| 117 | |
| 118 | // FIXME: Infer the maximum possible offset depending on the actual users |
| 119 | // (these max offsets are different for the users inside Thumb or ARM |
| 120 | // functions) |
| 121 | unsigned MaxOffset = TLI->getMaximalGlobalOffset(); |
| 122 | |
| 123 | // FIXME: Find better heuristics |
| 124 | std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); |
| 125 | |
Chris Lattner | db125cf | 2011-07-18 04:54:35 +0000 | [diff] [blame^] | 126 | Type *Int32Ty = Type::getInt32Ty(M.getContext()); |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 127 | |
| 128 | for (size_t i = 0, e = Globals.size(); i != e; ) { |
| 129 | size_t j = 0; |
| 130 | uint64_t MergedSize = 0; |
Jay Foad | 5fdd6c8 | 2011-07-12 14:06:48 +0000 | [diff] [blame] | 131 | std::vector<Type*> Tys; |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 132 | std::vector<Constant*> Inits; |
Bob Wilson | 619a372 | 2010-11-17 21:25:36 +0000 | [diff] [blame] | 133 | for (j = i; j != e; ++j) { |
Jay Foad | 5fdd6c8 | 2011-07-12 14:06:48 +0000 | [diff] [blame] | 134 | Type *Ty = Globals[j]->getType()->getElementType(); |
Bob Wilson | 619a372 | 2010-11-17 21:25:36 +0000 | [diff] [blame] | 135 | MergedSize += TD->getTypeAllocSize(Ty); |
| 136 | if (MergedSize > MaxOffset) { |
| 137 | break; |
| 138 | } |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 139 | Tys.push_back(Ty); |
| 140 | Inits.push_back(Globals[j]->getInitializer()); |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 141 | } |
| 142 | |
Chris Lattner | 252b491 | 2010-09-05 21:18:45 +0000 | [diff] [blame] | 143 | StructType *MergedTy = StructType::get(M.getContext(), Tys); |
| 144 | Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); |
| 145 | GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 146 | GlobalValue::InternalLinkage, |
Bob Wilson | 72831dc | 2010-11-17 21:25:33 +0000 | [diff] [blame] | 147 | MergedInit, "_MergedGlobals"); |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 148 | for (size_t k = i; k < j; ++k) { |
Chris Lattner | 252b491 | 2010-09-05 21:18:45 +0000 | [diff] [blame] | 149 | Constant *Idx[2] = { |
| 150 | ConstantInt::get(Int32Ty, 0), |
| 151 | ConstantInt::get(Int32Ty, k-i) |
| 152 | }; |
| 153 | Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2); |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 154 | Globals[k]->replaceAllUsesWith(GEP); |
| 155 | Globals[k]->eraseFromParent(); |
| 156 | } |
| 157 | i = j; |
| 158 | } |
| 159 | |
| 160 | return true; |
| 161 | } |
| 162 | |
| 163 | |
Chris Lattner | 252b491 | 2010-09-05 21:18:45 +0000 | [diff] [blame] | 164 | bool ARMGlobalMerge::doInitialization(Module &M) { |
Bob Wilson | 0564609 | 2010-11-17 21:25:39 +0000 | [diff] [blame] | 165 | SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals; |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 166 | const TargetData *TD = TLI->getTargetData(); |
| 167 | unsigned MaxOffset = TLI->getMaximalGlobalOffset(); |
| 168 | bool Changed = false; |
| 169 | |
| 170 | // Grab all non-const globals. |
| 171 | for (Module::global_iterator I = M.global_begin(), |
| 172 | E = M.global_end(); I != E; ++I) { |
| 173 | // Merge is safe for "normal" internal globals only |
| 174 | if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) |
| 175 | continue; |
| 176 | |
| 177 | // Ignore fancy-aligned globals for now. |
Cameron Zwarich | faff127 | 2011-06-29 22:24:25 +0000 | [diff] [blame] | 178 | unsigned Alignment = I->getAlignment(); |
Chris Lattner | db125cf | 2011-07-18 04:54:35 +0000 | [diff] [blame^] | 179 | Type *Ty = I->getType()->getElementType(); |
Cameron Zwarich | f75ae4c | 2011-07-11 01:29:42 +0000 | [diff] [blame] | 180 | if (Alignment > TD->getABITypeAlignment(Ty)) |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 181 | continue; |
| 182 | |
Anton Korobeynikov | b5a0ef9 | 2010-07-26 18:45:39 +0000 | [diff] [blame] | 183 | // Ignore all 'special' globals. |
| 184 | if (I->getName().startswith("llvm.") || |
| 185 | I->getName().startswith(".llvm.")) |
| 186 | continue; |
| 187 | |
Cameron Zwarich | f75ae4c | 2011-07-11 01:29:42 +0000 | [diff] [blame] | 188 | if (TD->getTypeAllocSize(Ty) < MaxOffset) { |
Bob Wilson | 0564609 | 2010-11-17 21:25:39 +0000 | [diff] [blame] | 189 | const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering(); |
| 190 | if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal()) |
| 191 | BSSGlobals.push_back(I); |
| 192 | else if (I->isConstant()) |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 193 | ConstGlobals.push_back(I); |
| 194 | else |
| 195 | Globals.push_back(I); |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | if (Globals.size() > 1) |
| 200 | Changed |= doMerge(Globals, M, false); |
Bob Wilson | 0564609 | 2010-11-17 21:25:39 +0000 | [diff] [blame] | 201 | if (BSSGlobals.size() > 1) |
| 202 | Changed |= doMerge(BSSGlobals, M, false); |
| 203 | |
Anton Korobeynikov | b5a0ef9 | 2010-07-26 18:45:39 +0000 | [diff] [blame] | 204 | // FIXME: This currently breaks the EH processing due to way how the |
| 205 | // typeinfo detection works. We might want to detect the TIs and ignore |
| 206 | // them in the future. |
Anton Korobeynikov | b5a0ef9 | 2010-07-26 18:45:39 +0000 | [diff] [blame] | 207 | // if (ConstGlobals.size() > 1) |
| 208 | // Changed |= doMerge(ConstGlobals, M, true); |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 209 | |
| 210 | return Changed; |
| 211 | } |
| 212 | |
Chris Lattner | 252b491 | 2010-09-05 21:18:45 +0000 | [diff] [blame] | 213 | bool ARMGlobalMerge::runOnFunction(Function &F) { |
Anton Korobeynikov | cec36f4 | 2010-07-24 21:52:08 +0000 | [diff] [blame] | 214 | return false; |
| 215 | } |
| 216 | |
| 217 | FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) { |
| 218 | return new ARMGlobalMerge(tli); |
| 219 | } |