| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 1 | //===- GlobalSplit.cpp - global variable splitter -------------------------===// | 
|  | 2 | // | 
| Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|  | 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 6 | // | 
|  | 7 | //===----------------------------------------------------------------------===// | 
|  | 8 | // | 
|  | 9 | // This pass uses inrange annotations on GEP indices to split globals where | 
|  | 10 | // beneficial. Clang currently attaches these annotations to references to | 
|  | 11 | // virtual table globals under the Itanium ABI for the benefit of the | 
|  | 12 | // whole-program virtual call optimization and control flow integrity passes. | 
|  | 13 | // | 
|  | 14 | //===----------------------------------------------------------------------===// | 
|  | 15 |  | 
| Davide Italiano | 2ae76dd | 2016-11-21 00:28:23 +0000 | [diff] [blame] | 16 | #include "llvm/Transforms/IPO/GlobalSplit.h" | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 17 | #include "llvm/ADT/SmallVector.h" | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 18 | #include "llvm/ADT/StringExtras.h" | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 19 | #include "llvm/IR/Constant.h" | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 20 | #include "llvm/IR/Constants.h" | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 21 | #include "llvm/IR/DataLayout.h" | 
|  | 22 | #include "llvm/IR/Function.h" | 
|  | 23 | #include "llvm/IR/GlobalValue.h" | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 24 | #include "llvm/IR/GlobalVariable.h" | 
|  | 25 | #include "llvm/IR/Intrinsics.h" | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 26 | #include "llvm/IR/LLVMContext.h" | 
|  | 27 | #include "llvm/IR/Metadata.h" | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 28 | #include "llvm/IR/Module.h" | 
|  | 29 | #include "llvm/IR/Operator.h" | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 30 | #include "llvm/IR/Type.h" | 
|  | 31 | #include "llvm/IR/User.h" | 
| Reid Kleckner | 05da2fe | 2019-11-13 13:15:01 -0800 | [diff] [blame] | 32 | #include "llvm/InitializePasses.h" | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 33 | #include "llvm/Pass.h" | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 34 | #include "llvm/Support/Casting.h" | 
| Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 35 | #include "llvm/Transforms/IPO.h" | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 36 | #include <cstdint> | 
|  | 37 | #include <vector> | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 38 |  | 
|  | 39 | using namespace llvm; | 
|  | 40 |  | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 41 | static bool splitGlobal(GlobalVariable &GV) { | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 42 | // If the address of the global is taken outside of the module, we cannot | 
|  | 43 | // apply this transformation. | 
|  | 44 | if (!GV.hasLocalLinkage()) | 
|  | 45 | return false; | 
|  | 46 |  | 
|  | 47 | // We currently only know how to split ConstantStructs. | 
|  | 48 | auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer()); | 
|  | 49 | if (!Init) | 
|  | 50 | return false; | 
|  | 51 |  | 
|  | 52 | // Verify that each user of the global is an inrange getelementptr constant. | 
|  | 53 | // From this it follows that any loads from or stores to that global must use | 
|  | 54 | // a pointer derived from an inrange getelementptr constant, which is | 
|  | 55 | // sufficient to allow us to apply the splitting transform. | 
|  | 56 | for (User *U : GV.users()) { | 
|  | 57 | if (!isa<Constant>(U)) | 
|  | 58 | return false; | 
|  | 59 |  | 
|  | 60 | auto *GEP = dyn_cast<GEPOperator>(U); | 
|  | 61 | if (!GEP || !GEP->getInRangeIndex() || *GEP->getInRangeIndex() != 1 || | 
|  | 62 | !isa<ConstantInt>(GEP->getOperand(1)) || | 
|  | 63 | !cast<ConstantInt>(GEP->getOperand(1))->isZero() || | 
|  | 64 | !isa<ConstantInt>(GEP->getOperand(2))) | 
|  | 65 | return false; | 
|  | 66 | } | 
|  | 67 |  | 
|  | 68 | SmallVector<MDNode *, 2> Types; | 
|  | 69 | GV.getMetadata(LLVMContext::MD_type, Types); | 
|  | 70 |  | 
|  | 71 | const DataLayout &DL = GV.getParent()->getDataLayout(); | 
|  | 72 | const StructLayout *SL = DL.getStructLayout(Init->getType()); | 
|  | 73 |  | 
|  | 74 | IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext()); | 
|  | 75 |  | 
|  | 76 | std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands()); | 
|  | 77 | for (unsigned I = 0; I != Init->getNumOperands(); ++I) { | 
|  | 78 | // Build a global representing this split piece. | 
|  | 79 | auto *SplitGV = | 
|  | 80 | new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(), | 
|  | 81 | GV.isConstant(), GlobalValue::PrivateLinkage, | 
|  | 82 | Init->getOperand(I), GV.getName() + "." + utostr(I)); | 
|  | 83 | SplitGlobals[I] = SplitGV; | 
|  | 84 |  | 
|  | 85 | unsigned SplitBegin = SL->getElementOffset(I); | 
|  | 86 | unsigned SplitEnd = (I == Init->getNumOperands() - 1) | 
|  | 87 | ? SL->getSizeInBytes() | 
|  | 88 | : SL->getElementOffset(I + 1); | 
|  | 89 |  | 
|  | 90 | // Rebuild type metadata, adjusting by the split offset. | 
|  | 91 | // FIXME: See if we can use DW_OP_piece to preserve debug metadata here. | 
|  | 92 | for (MDNode *Type : Types) { | 
|  | 93 | uint64_t ByteOffset = cast<ConstantInt>( | 
|  | 94 | cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) | 
|  | 95 | ->getZExtValue(); | 
| Evgeniy Stepanov | 7a5cfa9 | 2017-03-07 22:18:48 +0000 | [diff] [blame] | 96 | // Type metadata may be attached one byte after the end of the vtable, for | 
|  | 97 | // classes without virtual methods in Itanium ABI. AFAIK, it is never | 
|  | 98 | // attached to the first byte of a vtable. Subtract one to get the right | 
|  | 99 | // slice. | 
|  | 100 | // This is making an assumption that vtable groups are the only kinds of | 
|  | 101 | // global variables that !type metadata can be attached to, and that they | 
|  | 102 | // are either Itanium ABI vtable groups or contain a single vtable (i.e. | 
|  | 103 | // Microsoft ABI vtables). | 
|  | 104 | uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1; | 
|  | 105 | if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd) | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 106 | continue; | 
|  | 107 | SplitGV->addMetadata( | 
|  | 108 | LLVMContext::MD_type, | 
|  | 109 | *MDNode::get(GV.getContext(), | 
|  | 110 | {ConstantAsMetadata::get( | 
|  | 111 | ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)), | 
|  | 112 | Type->getOperand(1)})); | 
|  | 113 | } | 
|  | 114 | } | 
|  | 115 |  | 
|  | 116 | for (User *U : GV.users()) { | 
|  | 117 | auto *GEP = cast<GEPOperator>(U); | 
|  | 118 | unsigned I = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); | 
|  | 119 | if (I >= SplitGlobals.size()) | 
|  | 120 | continue; | 
|  | 121 |  | 
|  | 122 | SmallVector<Value *, 4> Ops; | 
|  | 123 | Ops.push_back(ConstantInt::get(Int32Ty, 0)); | 
|  | 124 | for (unsigned I = 3; I != GEP->getNumOperands(); ++I) | 
|  | 125 | Ops.push_back(GEP->getOperand(I)); | 
|  | 126 |  | 
|  | 127 | auto *NewGEP = ConstantExpr::getGetElementPtr( | 
|  | 128 | SplitGlobals[I]->getInitializer()->getType(), SplitGlobals[I], Ops, | 
|  | 129 | GEP->isInBounds()); | 
|  | 130 | GEP->replaceAllUsesWith(NewGEP); | 
|  | 131 | } | 
|  | 132 |  | 
|  | 133 | // Finally, remove the original global. Any remaining uses refer to invalid | 
|  | 134 | // elements of the global, so replace with undef. | 
|  | 135 | if (!GV.use_empty()) | 
|  | 136 | GV.replaceAllUsesWith(UndefValue::get(GV.getType())); | 
|  | 137 | GV.eraseFromParent(); | 
|  | 138 | return true; | 
|  | 139 | } | 
|  | 140 |  | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 141 | static bool splitGlobals(Module &M) { | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 142 | // First, see if the module uses either of the llvm.type.test or | 
|  | 143 | // llvm.type.checked.load intrinsics, which indicates that splitting globals | 
|  | 144 | // may be beneficial. | 
|  | 145 | Function *TypeTestFunc = | 
|  | 146 | M.getFunction(Intrinsic::getName(Intrinsic::type_test)); | 
|  | 147 | Function *TypeCheckedLoadFunc = | 
|  | 148 | M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); | 
|  | 149 | if ((!TypeTestFunc || TypeTestFunc->use_empty()) && | 
|  | 150 | (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) | 
|  | 151 | return false; | 
|  | 152 |  | 
|  | 153 | bool Changed = false; | 
|  | 154 | for (auto I = M.global_begin(); I != M.global_end();) { | 
|  | 155 | GlobalVariable &GV = *I; | 
|  | 156 | ++I; | 
|  | 157 | Changed |= splitGlobal(GV); | 
|  | 158 | } | 
|  | 159 | return Changed; | 
|  | 160 | } | 
|  | 161 |  | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 162 | namespace { | 
|  | 163 |  | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 164 | struct GlobalSplit : public ModulePass { | 
|  | 165 | static char ID; | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 166 |  | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 167 | GlobalSplit() : ModulePass(ID) { | 
|  | 168 | initializeGlobalSplitPass(*PassRegistry::getPassRegistry()); | 
|  | 169 | } | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 170 |  | 
|  | 171 | bool runOnModule(Module &M) override { | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 172 | if (skipModule(M)) | 
|  | 173 | return false; | 
|  | 174 |  | 
|  | 175 | return splitGlobals(M); | 
|  | 176 | } | 
|  | 177 | }; | 
|  | 178 |  | 
| Eugene Zelenko | e9ea08a | 2017-10-10 22:49:55 +0000 | [diff] [blame] | 179 | } // end anonymous namespace | 
|  | 180 |  | 
|  | 181 | char GlobalSplit::ID = 0; | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 182 |  | 
|  | 183 | INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false) | 
| Peter Collingbourne | f72a8d4 | 2016-11-16 23:40:26 +0000 | [diff] [blame] | 184 |  | 
|  | 185 | ModulePass *llvm::createGlobalSplitPass() { | 
|  | 186 | return new GlobalSplit; | 
|  | 187 | } | 
| Davide Italiano | 2ae76dd | 2016-11-21 00:28:23 +0000 | [diff] [blame] | 188 |  | 
|  | 189 | PreservedAnalyses GlobalSplitPass::run(Module &M, ModuleAnalysisManager &AM) { | 
|  | 190 | if (!splitGlobals(M)) | 
|  | 191 | return PreservedAnalyses::all(); | 
|  | 192 | return PreservedAnalyses::none(); | 
|  | 193 | } |