| Stephen Hines | 36b5688 | 2014-04-23 16:57:46 -0700 | [diff] [blame] | 1 | //===-------------- ARM64CollectLOH.cpp - ARM64 collect LOH pass --*- C++ -*-=// | 
 | 2 | // | 
 | 3 | //                     The LLVM Compiler Infrastructure | 
 | 4 | // | 
 | 5 | // This file is distributed under the University of Illinois Open Source | 
 | 6 | // License. See LICENSE.TXT for details. | 
 | 7 | // | 
 | 8 | //===----------------------------------------------------------------------===// | 
 | 9 | // | 
 | 10 | // This file contains a pass that collect the Linker Optimization Hint (LOH). | 
 | 11 | // This pass should be run at the very end of the compilation flow, just before | 
 | 12 | // assembly printer. | 
 | 13 | // To be useful for the linker, the LOH must be printed into the assembly file. | 
 | 14 | // | 
 | 15 | // A LOH describes a sequence of instructions that may be optimized by the | 
 | 16 | // linker. | 
 | 17 | // This same sequence cannot be optimized by the compiler because some of | 
 | 18 | // the information will be known at link time. | 
 | 19 | // For instance, consider the following sequence: | 
 | 20 | //     L1: adrp xA, sym@PAGE | 
 | 21 | //     L2: add xB, xA, sym@PAGEOFF | 
 | 22 | //     L3: ldr xC, [xB, #imm] | 
 | 23 | // This sequence can be turned into: | 
 | 24 | // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB: | 
 | 25 | //     L3: ldr xC, sym+#imm | 
 | 26 | // It may also be turned into either the following more efficient | 
 | 27 | // code sequences: | 
 | 28 | // - If sym@PAGEOFF + #imm fits the encoding space of L3. | 
 | 29 | //     L1: adrp xA, sym@PAGE | 
 | 30 | //     L3: ldr xC, [xB, sym@PAGEOFF + #imm] | 
 | 31 | // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB: | 
 | 32 | //     L1: adr xA, sym | 
 | 33 | //     L3: ldr xC, [xB, #imm] | 
 | 34 | // | 
 | 35 | // To be valid a LOH must meet all the requirements needed by all the related | 
 | 36 | // possible linker transformations. | 
 | 37 | // For instance, using the running example, the constraints to emit | 
 | 38 | // ".loh AdrpAddLdr" are: | 
 | 39 | // - L1, L2, and L3 instructions are of the expected type, i.e., | 
 | 40 | //   respectively ADRP, ADD (immediate), and LD. | 
 | 41 | // - The result of L1 is used only by L2. | 
 | 42 | // - The register argument (xA) used in the ADD instruction is defined | 
 | 43 | //   only by L1. | 
 | 44 | // - The result of L2 is used only by L3. | 
 | 45 | // - The base address (xB) in L3 is defined only L2. | 
 | 46 | // - The ADRP in L1 and the ADD in L2 must reference the same symbol using | 
 | 47 | //   @PAGE/@PAGEOFF with no additional constants | 
 | 48 | // | 
 | 49 | // Currently supported LOHs are: | 
 | 50 | // * So called non-ADRP-related: | 
 | 51 | //   - .loh AdrpAddLdr L1, L2, L3: | 
 | 52 | //     L1: adrp xA, sym@PAGE | 
 | 53 | //     L2: add xB, xA, sym@PAGEOFF | 
 | 54 | //     L3: ldr xC, [xB, #imm] | 
 | 55 | //   - .loh AdrpLdrGotLdr L1, L2, L3: | 
 | 56 | //     L1: adrp xA, sym@GOTPAGE | 
 | 57 | //     L2: ldr xB, [xA, sym@GOTPAGEOFF] | 
 | 58 | //     L3: ldr xC, [xB, #imm] | 
 | 59 | //   - .loh AdrpLdr L1, L3: | 
 | 60 | //     L1: adrp xA, sym@PAGE | 
 | 61 | //     L3: ldr xC, [xA, sym@PAGEOFF] | 
 | 62 | //   - .loh AdrpAddStr L1, L2, L3: | 
 | 63 | //     L1: adrp xA, sym@PAGE | 
 | 64 | //     L2: add xB, xA, sym@PAGEOFF | 
 | 65 | //     L3: str xC, [xB, #imm] | 
 | 66 | //   - .loh AdrpLdrGotStr L1, L2, L3: | 
 | 67 | //     L1: adrp xA, sym@GOTPAGE | 
 | 68 | //     L2: ldr xB, [xA, sym@GOTPAGEOFF] | 
 | 69 | //     L3: str xC, [xB, #imm] | 
 | 70 | //   - .loh AdrpAdd L1, L2: | 
 | 71 | //     L1: adrp xA, sym@PAGE | 
 | 72 | //     L2: add xB, xA, sym@PAGEOFF | 
 | 73 | //   For all these LOHs, L1, L2, L3 form a simple chain: | 
 | 74 | //   L1 result is used only by L2 and L2 result by L3. | 
 | 75 | //   L3 LOH-related argument is defined only by L2 and L2 LOH-related argument | 
 | 76 | //   by L1. | 
 | 77 | // All these LOHs aim at using more efficient load/store patterns by folding | 
 | 78 | // some instructions used to compute the address directly into the load/store. | 
 | 79 | // | 
 | 80 | // * So called ADRP-related: | 
 | 81 | //  - .loh AdrpAdrp L2, L1: | 
 | 82 | //    L2: ADRP xA, sym1@PAGE | 
 | 83 | //    L1: ADRP xA, sym2@PAGE | 
 | 84 | //    L2 dominates L1 and xA is not redifined between L2 and L1 | 
 | 85 | // This LOH aims at getting rid of redundant ADRP instructions. | 
 | 86 | // | 
 | 87 | // The overall design for emitting the LOHs is: | 
 | 88 | // 1. ARM64CollectLOH (this pass) records the LOHs in the ARM64FunctionInfo. | 
 | 89 | // 2. ARM64AsmPrinter reads the LOHs from ARM64FunctionInfo and it: | 
 | 90 | //     1. Associates them a label. | 
 | 91 | //     2. Emits them in a MCStreamer (EmitLOHDirective). | 
 | 92 | //         - The MCMachOStreamer records them into the MCAssembler. | 
 | 93 | //         - The MCAsmStreamer prints them. | 
 | 94 | //         - Other MCStreamers ignore them. | 
 | 95 | //     3. Closes the MCStreamer: | 
 | 96 | //         - The MachObjectWriter gets them from the MCAssembler and writes | 
 | 97 | //           them in the object file. | 
 | 98 | //         - Other ObjectWriters ignore them. | 
 | 99 | //===----------------------------------------------------------------------===// | 
 | 100 |  | 
 | 101 | #define DEBUG_TYPE "arm64-collect-loh" | 
 | 102 | #include "ARM64.h" | 
 | 103 | #include "ARM64InstrInfo.h" | 
 | 104 | #include "ARM64MachineFunctionInfo.h" | 
 | 105 | #include "MCTargetDesc/ARM64AddressingModes.h" | 
 | 106 | #include "llvm/ADT/BitVector.h" | 
 | 107 | #include "llvm/ADT/DenseMap.h" | 
 | 108 | #include "llvm/ADT/MapVector.h" | 
 | 109 | #include "llvm/ADT/SetVector.h" | 
 | 110 | #include "llvm/ADT/SmallVector.h" | 
 | 111 | #include "llvm/CodeGen/MachineBasicBlock.h" | 
 | 112 | #include "llvm/CodeGen/MachineDominators.h" | 
 | 113 | #include "llvm/CodeGen/MachineFunctionPass.h" | 
 | 114 | #include "llvm/CodeGen/MachineInstr.h" | 
 | 115 | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
 | 116 | #include "llvm/Target/TargetInstrInfo.h" | 
 | 117 | #include "llvm/Target/TargetMachine.h" | 
 | 118 | #include "llvm/Target/TargetRegisterInfo.h" | 
 | 119 | #include "llvm/Support/CommandLine.h" | 
 | 120 | #include "llvm/Support/Debug.h" | 
 | 121 | #include "llvm/Support/ErrorHandling.h" | 
 | 122 | #include "llvm/Support/raw_ostream.h" | 
 | 123 | #include "llvm/ADT/Statistic.h" | 
 | 124 | using namespace llvm; | 
 | 125 |  | 
 | 126 | static cl::opt<bool> | 
 | 127 | PreCollectRegister("arm64-collect-loh-pre-collect-register", cl::Hidden, | 
 | 128 |                    cl::desc("Restrict analysis to registers invovled" | 
 | 129 |                             " in LOHs"), | 
 | 130 |                    cl::init(true)); | 
 | 131 |  | 
 | 132 | static cl::opt<bool> | 
 | 133 | BasicBlockScopeOnly("arm64-collect-loh-bb-only", cl::Hidden, | 
 | 134 |                     cl::desc("Restrict analysis at basic block scope"), | 
 | 135 |                     cl::init(true)); | 
 | 136 |  | 
 | 137 | STATISTIC(NumADRPSimpleCandidate, | 
 | 138 |           "Number of simplifiable ADRP dominate by another"); | 
 | 139 | STATISTIC(NumADRPComplexCandidate2, | 
 | 140 |           "Number of simplifiable ADRP reachable by 2 defs"); | 
 | 141 | STATISTIC(NumADRPComplexCandidate3, | 
 | 142 |           "Number of simplifiable ADRP reachable by 3 defs"); | 
 | 143 | STATISTIC(NumADRPComplexCandidateOther, | 
 | 144 |           "Number of simplifiable ADRP reachable by 4 or more defs"); | 
 | 145 | STATISTIC(NumADDToSTRWithImm, | 
 | 146 |           "Number of simplifiable STR with imm reachable by ADD"); | 
 | 147 | STATISTIC(NumLDRToSTRWithImm, | 
 | 148 |           "Number of simplifiable STR with imm reachable by LDR"); | 
 | 149 | STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); | 
 | 150 | STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); | 
 | 151 | STATISTIC(NumADDToLDRWithImm, | 
 | 152 |           "Number of simplifiable LDR with imm reachable by ADD"); | 
 | 153 | STATISTIC(NumLDRToLDRWithImm, | 
 | 154 |           "Number of simplifiable LDR with imm reachable by LDR"); | 
 | 155 | STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); | 
 | 156 | STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); | 
 | 157 | STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); | 
 | 158 | STATISTIC(NumCplxLvl1, "Number of complex case of level 1"); | 
 | 159 | STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1"); | 
 | 160 | STATISTIC(NumCplxLvl2, "Number of complex case of level 2"); | 
 | 161 | STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2"); | 
 | 162 | STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); | 
 | 163 | STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); | 
 | 164 |  | 
 | 165 | namespace llvm { | 
 | 166 | void initializeARM64CollectLOHPass(PassRegistry &); | 
 | 167 | } | 
 | 168 |  | 
 | 169 | namespace { | 
 | 170 | struct ARM64CollectLOH : public MachineFunctionPass { | 
 | 171 |   static char ID; | 
 | 172 |   ARM64CollectLOH() : MachineFunctionPass(ID) { | 
 | 173 |     initializeARM64CollectLOHPass(*PassRegistry::getPassRegistry()); | 
 | 174 |   } | 
 | 175 |  | 
 | 176 |   virtual bool runOnMachineFunction(MachineFunction &Fn); | 
 | 177 |  | 
 | 178 |   virtual const char *getPassName() const { | 
 | 179 |     return "ARM64 Collect Linker Optimization Hint (LOH)"; | 
 | 180 |   } | 
 | 181 |  | 
 | 182 |   void getAnalysisUsage(AnalysisUsage &AU) const { | 
 | 183 |     AU.setPreservesAll(); | 
 | 184 |     MachineFunctionPass::getAnalysisUsage(AU); | 
 | 185 |     AU.addRequired<MachineDominatorTree>(); | 
 | 186 |   } | 
 | 187 |  | 
 | 188 | private: | 
 | 189 | }; | 
 | 190 |  | 
 | 191 | /// A set of MachineInstruction. | 
 | 192 | typedef SetVector<const MachineInstr *> SetOfMachineInstr; | 
 | 193 | /// Map a basic block to a set of instructions per register. | 
 | 194 | /// This is used to represent the exposed uses of a basic block | 
 | 195 | /// per register. | 
 | 196 | typedef MapVector<const MachineBasicBlock *, SetOfMachineInstr *> | 
 | 197 | BlockToSetOfInstrsPerColor; | 
 | 198 | /// Map a basic block to an instruction per register. | 
 | 199 | /// This is used to represent the live-out definitions of a basic block | 
 | 200 | /// per register. | 
 | 201 | typedef MapVector<const MachineBasicBlock *, const MachineInstr **> | 
 | 202 | BlockToInstrPerColor; | 
 | 203 | /// Map an instruction to a set of instructions. Used to represent the | 
 | 204 | /// mapping def to reachable uses or use to definitions. | 
 | 205 | typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs; | 
 | 206 | /// Map a basic block to a BitVector. | 
 | 207 | /// This is used to record the kill registers per basic block. | 
 | 208 | typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet; | 
 | 209 |  | 
 | 210 | /// Map a register to a dense id. | 
 | 211 | typedef DenseMap<unsigned, unsigned> MapRegToId; | 
 | 212 | /// Map a dense id to a register. Used for debug purposes. | 
 | 213 | typedef SmallVector<unsigned, 32> MapIdToReg; | 
 | 214 | } // end anonymous namespace. | 
 | 215 |  | 
 | 216 | char ARM64CollectLOH::ID = 0; | 
 | 217 |  | 
 | 218 | INITIALIZE_PASS_BEGIN(ARM64CollectLOH, "arm64-collect-loh", | 
 | 219 |                       "ARM64 Collect Linker Optimization Hint (LOH)", false, | 
 | 220 |                       false) | 
 | 221 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) | 
 | 222 | INITIALIZE_PASS_END(ARM64CollectLOH, "arm64-collect-loh", | 
 | 223 |                     "ARM64 Collect Linker Optimization Hint (LOH)", false, | 
 | 224 |                     false) | 
 | 225 |  | 
 | 226 | /// Given a couple (MBB, reg) get the corresponding set of instruction from | 
 | 227 | /// the given "sets". | 
 | 228 | /// If this couple does not reference any set, an empty set is added to "sets" | 
 | 229 | /// for this couple and returned. | 
 | 230 | /// \param nbRegs is used internally allocate some memory. It must be consistent | 
 | 231 | /// with the way sets is used. | 
 | 232 | static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, | 
 | 233 |                                  const MachineBasicBlock *MBB, unsigned reg, | 
 | 234 |                                  unsigned nbRegs) { | 
 | 235 |   SetOfMachineInstr *result; | 
 | 236 |   BlockToSetOfInstrsPerColor::iterator it = sets.find(MBB); | 
 | 237 |   if (it != sets.end()) { | 
 | 238 |     result = it->second; | 
 | 239 |   } else { | 
 | 240 |     result = sets[MBB] = new SetOfMachineInstr[nbRegs]; | 
 | 241 |   } | 
 | 242 |  | 
 | 243 |   return result[reg]; | 
 | 244 | } | 
 | 245 |  | 
 | 246 | /// Given a couple (reg, MI) get the corresponding set of instructions from the | 
 | 247 | /// the given "sets". | 
 | 248 | /// This is used to get the uses record in sets of a definition identified by | 
 | 249 | /// MI and reg, i.e., MI defines reg. | 
 | 250 | /// If the couple does not reference anything, an empty set is added to | 
 | 251 | /// "sets[reg]". | 
 | 252 | /// \pre set[reg] is valid. | 
 | 253 | static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, | 
 | 254 |                                   const MachineInstr *MI) { | 
 | 255 |   return sets[reg][MI]; | 
 | 256 | } | 
 | 257 |  | 
 | 258 | /// Same as getUses but does not modify the input map: sets. | 
 | 259 | /// \return NULL if the couple (reg, MI) is not in sets. | 
 | 260 | static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, | 
 | 261 |                                         const MachineInstr *MI) { | 
 | 262 |   InstrToInstrs::const_iterator Res = sets[reg].find(MI); | 
 | 263 |   if (Res != sets[reg].end()) | 
 | 264 |     return &(Res->second); | 
 | 265 |   return NULL; | 
 | 266 | } | 
 | 267 |  | 
 | 268 | /// Initialize the reaching definition algorithm: | 
 | 269 | /// For each basic block BB in MF, record: | 
 | 270 | /// - its kill set. | 
 | 271 | /// - its reachable uses (uses that are exposed to BB's predecessors). | 
 | 272 | /// - its the generated definitions. | 
 | 273 | /// \param DummyOp if not NULL, specifies a Dummy Operation to be added to | 
 | 274 | /// the list of uses of exposed defintions. | 
 | 275 | /// \param ADRPMode specifies to only consider ADRP instructions for generated | 
 | 276 | /// definition. It also consider definitions of ADRP instructions as uses and | 
 | 277 | /// ignore other uses. The ADRPMode is used to collect the information for LHO | 
 | 278 | /// that involve ADRP operation only. | 
 | 279 | static void initReachingDef(MachineFunction *MF, | 
 | 280 |                             InstrToInstrs *ColorOpToReachedUses, | 
 | 281 |                             BlockToInstrPerColor &Gen, BlockToRegSet &Kill, | 
 | 282 |                             BlockToSetOfInstrsPerColor &ReachableUses, | 
 | 283 |                             const MapRegToId &RegToId, | 
 | 284 |                             const MachineInstr *DummyOp, bool ADRPMode) { | 
 | 285 |   const TargetMachine &TM = MF->getTarget(); | 
 | 286 |   const TargetRegisterInfo *TRI = TM.getRegisterInfo(); | 
 | 287 |  | 
 | 288 |   unsigned NbReg = RegToId.size(); | 
 | 289 |  | 
 | 290 |   for (MachineFunction::const_iterator IMBB = MF->begin(), IMBBEnd = MF->end(); | 
 | 291 |        IMBB != IMBBEnd; ++IMBB) { | 
 | 292 |     const MachineBasicBlock *MBB = &(*IMBB); | 
 | 293 |     const MachineInstr **&BBGen = Gen[MBB]; | 
 | 294 |     BBGen = new const MachineInstr *[NbReg]; | 
 | 295 |     memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg); | 
 | 296 |  | 
 | 297 |     BitVector &BBKillSet = Kill[MBB]; | 
 | 298 |     BBKillSet.resize(NbReg); | 
 | 299 |     for (MachineBasicBlock::const_iterator II = MBB->begin(), IEnd = MBB->end(); | 
 | 300 |          II != IEnd; ++II) { | 
 | 301 |       bool IsADRP = II->getOpcode() == ARM64::ADRP; | 
 | 302 |  | 
 | 303 |       // Process uses first. | 
 | 304 |       if (IsADRP || !ADRPMode) | 
 | 305 |         for (MachineInstr::const_mop_iterator IO = II->operands_begin(), | 
 | 306 |                                               IOEnd = II->operands_end(); | 
 | 307 |              IO != IOEnd; ++IO) { | 
 | 308 |           // Treat ADRP def as use, as the goal of the analysis is to find | 
 | 309 |           // ADRP defs reached by other ADRP defs. | 
 | 310 |           if (!IO->isReg() || (!ADRPMode && !IO->isUse()) || | 
 | 311 |               (ADRPMode && (!IsADRP || !IO->isDef()))) | 
 | 312 |             continue; | 
 | 313 |           unsigned CurReg = IO->getReg(); | 
 | 314 |           MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); | 
 | 315 |           if (ItCurRegId == RegToId.end()) | 
 | 316 |             continue; | 
 | 317 |           CurReg = ItCurRegId->second; | 
 | 318 |  | 
 | 319 |           // if CurReg has not been defined, this use is reachable. | 
 | 320 |           if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) | 
 | 321 |             getSet(ReachableUses, MBB, CurReg, NbReg).insert(&(*II)); | 
 | 322 |           // current basic block definition for this color, if any, is in Gen. | 
 | 323 |           if (BBGen[CurReg]) | 
 | 324 |             getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(&(*II)); | 
 | 325 |         } | 
 | 326 |  | 
 | 327 |       // Process clobbers. | 
 | 328 |       for (MachineInstr::const_mop_iterator IO = II->operands_begin(), | 
 | 329 |                                             IOEnd = II->operands_end(); | 
 | 330 |            IO != IOEnd; ++IO) { | 
 | 331 |         if (!IO->isRegMask()) | 
 | 332 |           continue; | 
 | 333 |         // Clobbers kill the related colors. | 
 | 334 |         const uint32_t *PreservedRegs = IO->getRegMask(); | 
 | 335 |  | 
 | 336 |         // Set generated regs. | 
 | 337 |         for (const auto Entry : RegToId) { | 
 | 338 |           unsigned Reg = Entry.second; | 
 | 339 |           // Use the global register ID when querying APIs external to this | 
 | 340 |           // pass. | 
 | 341 |           if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { | 
 | 342 |             // Do not register clobbered definition for no ADRP. | 
 | 343 |             // This definition is not used anyway (otherwise register | 
 | 344 |             // allocation is wrong). | 
 | 345 |             BBGen[Reg] = ADRPMode ? II : NULL; | 
 | 346 |             BBKillSet.set(Reg); | 
 | 347 |           } | 
 | 348 |         } | 
 | 349 |       } | 
 | 350 |  | 
 | 351 |       // Process defs | 
 | 352 |       for (MachineInstr::const_mop_iterator IO = II->operands_begin(), | 
 | 353 |                                             IOEnd = II->operands_end(); | 
 | 354 |            IO != IOEnd; ++IO) { | 
 | 355 |         if (!IO->isReg() || !IO->isDef()) | 
 | 356 |           continue; | 
 | 357 |         unsigned CurReg = IO->getReg(); | 
 | 358 |         MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); | 
 | 359 |         if (ItCurRegId == RegToId.end()) | 
 | 360 |           continue; | 
 | 361 |  | 
 | 362 |         for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { | 
 | 363 |           MapRegToId::const_iterator ItRegId = RegToId.find(*AI); | 
 | 364 |           assert(ItRegId != RegToId.end() && | 
 | 365 |                  "Sub-register of an " | 
 | 366 |                  "involved register, not recorded as involved!"); | 
 | 367 |           BBKillSet.set(ItRegId->second); | 
 | 368 |           BBGen[ItRegId->second] = &(*II); | 
 | 369 |         } | 
 | 370 |         BBGen[ItCurRegId->second] = &(*II); | 
 | 371 |       } | 
 | 372 |     } | 
 | 373 |  | 
 | 374 |     // If we restrict our analysis to basic block scope, conservatively add a | 
 | 375 |     // dummy | 
 | 376 |     // use for each generated value. | 
 | 377 |     if (!ADRPMode && DummyOp && !MBB->succ_empty()) | 
 | 378 |       for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) | 
 | 379 |         if (BBGen[CurReg]) | 
 | 380 |           getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(DummyOp); | 
 | 381 |   } | 
 | 382 | } | 
 | 383 |  | 
 | 384 | /// Reaching def core algorithm: | 
 | 385 | /// while an Out has changed | 
 | 386 | ///    for each bb | 
 | 387 | ///       for each color | 
 | 388 | ///           In[bb][color] = U Out[bb.predecessors][color] | 
 | 389 | ///           insert reachableUses[bb][color] in each in[bb][color] | 
 | 390 | ///                 op.reachedUses | 
 | 391 | /// | 
 | 392 | ///           Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) | 
 | 393 | static void reachingDefAlgorithm(MachineFunction *MF, | 
 | 394 |                                  InstrToInstrs *ColorOpToReachedUses, | 
 | 395 |                                  BlockToSetOfInstrsPerColor &In, | 
 | 396 |                                  BlockToSetOfInstrsPerColor &Out, | 
 | 397 |                                  BlockToInstrPerColor &Gen, BlockToRegSet &Kill, | 
 | 398 |                                  BlockToSetOfInstrsPerColor &ReachableUses, | 
 | 399 |                                  unsigned NbReg) { | 
 | 400 |   bool HasChanged; | 
 | 401 |   do { | 
 | 402 |     HasChanged = false; | 
 | 403 |     for (MachineFunction::const_iterator IMBB = MF->begin(), | 
 | 404 |                                          IMBBEnd = MF->end(); | 
 | 405 |          IMBB != IMBBEnd; ++IMBB) { | 
 | 406 |       const MachineBasicBlock *MBB = &(*IMBB); | 
 | 407 |       unsigned CurReg; | 
 | 408 |       for (CurReg = 0; CurReg < NbReg; ++CurReg) { | 
 | 409 |         SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); | 
 | 410 |         SetOfMachineInstr &BBReachableUses = | 
 | 411 |             getSet(ReachableUses, MBB, CurReg, NbReg); | 
 | 412 |         SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); | 
 | 413 |         unsigned Size = BBOutSet.size(); | 
 | 414 |         //   In[bb][color] = U Out[bb.predecessors][color] | 
 | 415 |         for (MachineBasicBlock::const_pred_iterator | 
 | 416 |                  PredMBB = MBB->pred_begin(), | 
 | 417 |                  EndPredMBB = MBB->pred_end(); | 
 | 418 |              PredMBB != EndPredMBB; ++PredMBB) { | 
 | 419 |           SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); | 
 | 420 |           BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); | 
 | 421 |         } | 
 | 422 |         //   insert reachableUses[bb][color] in each in[bb][color] op.reachedses | 
 | 423 |         for (const MachineInstr *MI: BBInSet) { | 
 | 424 |           SetOfMachineInstr &OpReachedUses = | 
 | 425 |               getUses(ColorOpToReachedUses, CurReg, MI); | 
 | 426 |           OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); | 
 | 427 |         } | 
 | 428 |         //           Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) | 
 | 429 |         if (!Kill[MBB].test(CurReg)) | 
 | 430 |           BBOutSet.insert(BBInSet.begin(), BBInSet.end()); | 
 | 431 |         if (Gen[MBB][CurReg]) | 
 | 432 |           BBOutSet.insert(Gen[MBB][CurReg]); | 
 | 433 |         HasChanged |= BBOutSet.size() != Size; | 
 | 434 |       } | 
 | 435 |     } | 
 | 436 |   } while (HasChanged); | 
 | 437 | } | 
 | 438 |  | 
 | 439 | /// Release all memory dynamically allocated during the reaching | 
 | 440 | /// definition algorithm. | 
 | 441 | static void finitReachingDef(BlockToSetOfInstrsPerColor &In, | 
 | 442 |                              BlockToSetOfInstrsPerColor &Out, | 
 | 443 |                              BlockToInstrPerColor &Gen, | 
 | 444 |                              BlockToSetOfInstrsPerColor &ReachableUses) { | 
 | 445 |   for (BlockToSetOfInstrsPerColor::const_iterator IT = Out.begin(), | 
 | 446 |                                                   End = Out.end(); | 
 | 447 |        IT != End; ++IT) | 
 | 448 |     delete[] IT->second; | 
 | 449 |   for (BlockToSetOfInstrsPerColor::const_iterator IT = In.begin(), | 
 | 450 |                                                   End = In.end(); | 
 | 451 |        IT != End; ++IT) | 
 | 452 |     delete[] IT->second; | 
 | 453 |   for (BlockToSetOfInstrsPerColor::const_iterator IT = ReachableUses.begin(), | 
 | 454 |                                                   End = ReachableUses.end(); | 
 | 455 |        IT != End; ++IT) | 
 | 456 |     delete[] IT->second; | 
 | 457 |   for (BlockToInstrPerColor::const_iterator IT = Gen.begin(), End = Gen.end(); | 
 | 458 |        IT != End; ++IT) | 
 | 459 |     delete[] IT->second; | 
 | 460 | } | 
 | 461 |  | 
 | 462 | /// Reaching definiton algorithm. | 
 | 463 | /// \param MF function on which the algorithm will operate. | 
 | 464 | /// \param[out] ColorOpToReachedUses will contain the result of the reaching | 
 | 465 | /// def algorithm. | 
 | 466 | /// \param ADRPMode specify whether the reaching def algorithm should be tuned | 
 | 467 | /// for ADRP optimization. \see initReachingDef for more details. | 
 | 468 | /// \param DummyOp if not NULL, the algorithm will work at | 
 | 469 | /// basic block scope and will set for every exposed defintion a use to | 
 | 470 | /// @p DummyOp. | 
 | 471 | /// \pre ColorOpToReachedUses is an array of at least number of registers of | 
 | 472 | /// InstrToInstrs. | 
 | 473 | static void reachingDef(MachineFunction *MF, | 
 | 474 |                         InstrToInstrs *ColorOpToReachedUses, | 
 | 475 |                         const MapRegToId &RegToId, bool ADRPMode = false, | 
 | 476 |                         const MachineInstr *DummyOp = NULL) { | 
 | 477 |   // structures: | 
 | 478 |   // For each basic block. | 
 | 479 |   // Out: a set per color of definitions that reach the | 
 | 480 |   //      out boundary of this block. | 
 | 481 |   // In: Same as Out but for in boundary. | 
 | 482 |   // Gen: generated color in this block (one operation per color). | 
 | 483 |   // Kill: register set of killed color in this block. | 
 | 484 |   // ReachableUses: a set per color of uses (operation) reachable | 
 | 485 |   //                for "In" definitions. | 
 | 486 |   BlockToSetOfInstrsPerColor Out, In, ReachableUses; | 
 | 487 |   BlockToInstrPerColor Gen; | 
 | 488 |   BlockToRegSet Kill; | 
 | 489 |  | 
 | 490 |   // Initialize Gen, kill and reachableUses. | 
 | 491 |   initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, | 
 | 492 |                   DummyOp, ADRPMode); | 
 | 493 |  | 
 | 494 |   // Algo. | 
 | 495 |   if (!DummyOp) | 
 | 496 |     reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, | 
 | 497 |                          ReachableUses, RegToId.size()); | 
 | 498 |  | 
 | 499 |   // finit. | 
 | 500 |   finitReachingDef(In, Out, Gen, ReachableUses); | 
 | 501 | } | 
 | 502 |  | 
 | 503 | #ifndef NDEBUG | 
 | 504 | /// print the result of the reaching definition algorithm. | 
 | 505 | static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, | 
 | 506 |                              unsigned NbReg, const TargetRegisterInfo *TRI, | 
 | 507 |                              const MapIdToReg &IdToReg) { | 
 | 508 |   unsigned CurReg; | 
 | 509 |   for (CurReg = 0; CurReg < NbReg; ++CurReg) { | 
 | 510 |     if (ColorOpToReachedUses[CurReg].empty()) | 
 | 511 |       continue; | 
 | 512 |     DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); | 
 | 513 |  | 
 | 514 |     InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin(); | 
 | 515 |     InstrToInstrs::const_iterator DefsItEnd = | 
 | 516 |         ColorOpToReachedUses[CurReg].end(); | 
 | 517 |     for (; DefsIt != DefsItEnd; ++DefsIt) { | 
 | 518 |       DEBUG(dbgs() << "Def:\n"); | 
 | 519 |       DEBUG(DefsIt->first->print(dbgs())); | 
 | 520 |       DEBUG(dbgs() << "Reachable uses:\n"); | 
 | 521 |       for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(), | 
 | 522 |                                              UsesItEnd = DefsIt->second.end(); | 
 | 523 |            UsesIt != UsesItEnd; ++UsesIt) { | 
 | 524 |         DEBUG((*UsesIt)->print(dbgs())); | 
 | 525 |       } | 
 | 526 |     } | 
 | 527 |   } | 
 | 528 | } | 
 | 529 | #endif // NDEBUG | 
 | 530 |  | 
 | 531 | /// Answer the following question: Can Def be one of the definition | 
 | 532 | /// involved in a part of a LOH? | 
 | 533 | static bool canDefBePartOfLOH(const MachineInstr *Def) { | 
 | 534 |   unsigned Opc = Def->getOpcode(); | 
 | 535 |   // Accept ADRP, ADDLow and LOADGot. | 
 | 536 |   switch (Opc) { | 
 | 537 |   default: | 
 | 538 |     return false; | 
 | 539 |   case ARM64::ADRP: | 
 | 540 |     return true; | 
 | 541 |   case ARM64::ADDXri: | 
 | 542 |     // Check immediate to see if the immediate is an address. | 
 | 543 |     switch (Def->getOperand(2).getType()) { | 
 | 544 |     default: | 
 | 545 |       return false; | 
 | 546 |     case MachineOperand::MO_GlobalAddress: | 
 | 547 |     case MachineOperand::MO_JumpTableIndex: | 
 | 548 |     case MachineOperand::MO_ConstantPoolIndex: | 
 | 549 |     case MachineOperand::MO_BlockAddress: | 
 | 550 |       return true; | 
 | 551 |     } | 
 | 552 |   case ARM64::LDRXui: | 
 | 553 |     // Check immediate to see if the immediate is an address. | 
 | 554 |     switch (Def->getOperand(2).getType()) { | 
 | 555 |     default: | 
 | 556 |       return false; | 
 | 557 |     case MachineOperand::MO_GlobalAddress: | 
 | 558 |       return true; | 
 | 559 |     } | 
 | 560 |   } | 
 | 561 |   // Unreachable. | 
 | 562 |   return false; | 
 | 563 | } | 
 | 564 |  | 
 | 565 | /// Check whether the given instruction can the end of a LOH chain involving a | 
 | 566 | /// store. | 
 | 567 | static bool isCandidateStore(const MachineInstr *Instr) { | 
 | 568 |   switch (Instr->getOpcode()) { | 
 | 569 |   default: | 
 | 570 |     return false; | 
 | 571 |   case ARM64::STRBui: | 
 | 572 |   case ARM64::STRHui: | 
 | 573 |   case ARM64::STRWui: | 
 | 574 |   case ARM64::STRXui: | 
 | 575 |   case ARM64::STRSui: | 
 | 576 |   case ARM64::STRDui: | 
 | 577 |   case ARM64::STRQui: | 
 | 578 |     // In case we have str xA, [xA, #imm], this is two different uses | 
 | 579 |     // of xA and we cannot fold, otherwise the xA stored may be wrong, | 
 | 580 |     // even if #imm == 0. | 
 | 581 |     if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg()) | 
 | 582 |       return true; | 
 | 583 |   } | 
 | 584 |   return false; | 
 | 585 | } | 
 | 586 |  | 
 | 587 | /// Given the result of a reaching defintion algorithm in ColorOpToReachedUses, | 
 | 588 | /// Build the Use to Defs information and filter out obvious non-LOH candidates. | 
 | 589 | /// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. | 
 | 590 | /// In non-ADRPMode, non-LOH candidates are "uses" with several definition, | 
 | 591 | /// i.e., no simple chain. | 
 | 592 | /// \param ADRPMode -- \see initReachingDef. | 
 | 593 | static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, | 
 | 594 |                               const InstrToInstrs *ColorOpToReachedUses, | 
 | 595 |                               const MapRegToId &RegToId, | 
 | 596 |                               bool ADRPMode = false) { | 
 | 597 |  | 
 | 598 |   SetOfMachineInstr NotCandidate; | 
 | 599 |   unsigned NbReg = RegToId.size(); | 
 | 600 |   MapRegToId::const_iterator EndIt = RegToId.end(); | 
 | 601 |   for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) { | 
 | 602 |     // If this color is never defined, continue. | 
 | 603 |     if (ColorOpToReachedUses[CurReg].empty()) | 
 | 604 |       continue; | 
 | 605 |  | 
 | 606 |     InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin(); | 
 | 607 |     InstrToInstrs::const_iterator DefsItEnd = | 
 | 608 |         ColorOpToReachedUses[CurReg].end(); | 
 | 609 |     for (; DefsIt != DefsItEnd; ++DefsIt) { | 
 | 610 |       for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(), | 
 | 611 |                                              UsesItEnd = DefsIt->second.end(); | 
 | 612 |            UsesIt != UsesItEnd; ++UsesIt) { | 
 | 613 |         const MachineInstr *Def = DefsIt->first; | 
 | 614 |         MapRegToId::const_iterator It; | 
 | 615 |         // if all the reaching defs are not adrp, this use will not be | 
 | 616 |         // simplifiable. | 
 | 617 |         if ((ADRPMode && Def->getOpcode() != ARM64::ADRP) || | 
 | 618 |             (!ADRPMode && !canDefBePartOfLOH(Def)) || | 
 | 619 |             (!ADRPMode && isCandidateStore(*UsesIt) && | 
 | 620 |              // store are LOH candidate iff the end of the chain is used as | 
 | 621 |              // base. | 
 | 622 |              ((It = RegToId.find((*UsesIt)->getOperand(1).getReg())) == EndIt || | 
 | 623 |               It->second != CurReg))) { | 
 | 624 |           NotCandidate.insert(*UsesIt); | 
 | 625 |           continue; | 
 | 626 |         } | 
 | 627 |         // Do not consider self reaching as a simplifiable case for ADRP. | 
 | 628 |         if (!ADRPMode || *UsesIt != DefsIt->first) { | 
 | 629 |           UseToReachingDefs[*UsesIt].insert(DefsIt->first); | 
 | 630 |           // If UsesIt has several reaching definitions, it is not | 
 | 631 |           // candidate for simplificaton in non-ADRPMode. | 
 | 632 |           if (!ADRPMode && UseToReachingDefs[*UsesIt].size() > 1) | 
 | 633 |             NotCandidate.insert(*UsesIt); | 
 | 634 |         } | 
 | 635 |       } | 
 | 636 |     } | 
 | 637 |   } | 
 | 638 |   for (const MachineInstr *Elem : NotCandidate) { | 
 | 639 |     DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); | 
 | 640 |     // It would have been better if we could just remove the entry | 
 | 641 |     // from the map.  Because of that, we have to filter the garbage | 
 | 642 |     // (second.empty) in the subsequence analysis. | 
 | 643 |     UseToReachingDefs[Elem].clear(); | 
 | 644 |   } | 
 | 645 | } | 
 | 646 |  | 
 | 647 | /// Based on the use to defs information (in ADRPMode), compute the | 
 | 648 | /// opportunities of LOH ADRP-related. | 
 | 649 | static void computeADRP(const InstrToInstrs &UseToDefs, | 
 | 650 |                         ARM64FunctionInfo &ARM64FI, | 
 | 651 |                         const MachineDominatorTree *MDT) { | 
 | 652 |   DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); | 
 | 653 |   for (const auto &Entry: UseToDefs) { | 
 | 654 |     unsigned Size = Entry.second.size(); | 
 | 655 |     if (Size == 0) | 
 | 656 |       continue; | 
 | 657 |     if (Size == 1) { | 
 | 658 |       const MachineInstr *L2 = *Entry.second.begin(); | 
 | 659 |       const MachineInstr *L1 = Entry.first; | 
 | 660 |       if (!MDT->dominates(L2, L1)) { | 
 | 661 |         DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 | 
 | 662 |                      << '\n'); | 
 | 663 |         continue; | 
 | 664 |       } | 
 | 665 |       DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); | 
 | 666 |       SmallVector<const MachineInstr *, 2> Args; | 
 | 667 |       Args.push_back(L2); | 
 | 668 |       Args.push_back(L1); | 
 | 669 |       ARM64FI.addLOHDirective(MCLOH_AdrpAdrp, Args); | 
 | 670 |       ++NumADRPSimpleCandidate; | 
 | 671 |     } | 
 | 672 | #ifdef DEBUG | 
 | 673 |     else if (Size == 2) | 
 | 674 |       ++NumADRPComplexCandidate2; | 
 | 675 |     else if (Size == 3) | 
 | 676 |       ++NumADRPComplexCandidate3; | 
 | 677 |     else | 
 | 678 |       ++NumADRPComplexCandidateOther; | 
 | 679 | #endif | 
 | 680 |     // if Size < 1, the use should have been removed from the candidates | 
 | 681 |     assert(Size >= 1 && "No reaching defs for that use!"); | 
 | 682 |   } | 
 | 683 | } | 
 | 684 |  | 
 | 685 | /// Check whether the given instruction can be the end of a LOH chain | 
 | 686 | /// involving a load. | 
 | 687 | static bool isCandidateLoad(const MachineInstr *Instr) { | 
 | 688 |   switch (Instr->getOpcode()) { | 
 | 689 |   default: | 
 | 690 |     return false; | 
 | 691 |   case ARM64::LDRSBWui: | 
 | 692 |   case ARM64::LDRSBXui: | 
 | 693 |   case ARM64::LDRSHWui: | 
 | 694 |   case ARM64::LDRSHXui: | 
 | 695 |   case ARM64::LDRSWui: | 
 | 696 |   case ARM64::LDRBui: | 
 | 697 |   case ARM64::LDRHui: | 
 | 698 |   case ARM64::LDRWui: | 
 | 699 |   case ARM64::LDRXui: | 
 | 700 |   case ARM64::LDRSui: | 
 | 701 |   case ARM64::LDRDui: | 
 | 702 |   case ARM64::LDRQui: | 
 | 703 |     if (Instr->getOperand(2).getTargetFlags() & ARM64II::MO_GOT) | 
 | 704 |       return false; | 
 | 705 |     return true; | 
 | 706 |   } | 
 | 707 |   // Unreachable. | 
 | 708 |   return false; | 
 | 709 | } | 
 | 710 |  | 
 | 711 | /// Check whether the given instruction can load a litteral. | 
 | 712 | static bool supportLoadFromLiteral(const MachineInstr *Instr) { | 
 | 713 |   switch (Instr->getOpcode()) { | 
 | 714 |   default: | 
 | 715 |     return false; | 
 | 716 |   case ARM64::LDRSWui: | 
 | 717 |   case ARM64::LDRWui: | 
 | 718 |   case ARM64::LDRXui: | 
 | 719 |   case ARM64::LDRSui: | 
 | 720 |   case ARM64::LDRDui: | 
 | 721 |   case ARM64::LDRQui: | 
 | 722 |     return true; | 
 | 723 |   } | 
 | 724 |   // Unreachable. | 
 | 725 |   return false; | 
 | 726 | } | 
 | 727 |  | 
 | 728 | /// Check whether the given instruction is a LOH candidate. | 
 | 729 | /// \param UseToDefs is used to check that Instr is at the end of LOH supported | 
 | 730 | /// chain. | 
 | 731 | /// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are | 
 | 732 | /// already been filtered out. | 
 | 733 | static bool isCandidate(const MachineInstr *Instr, | 
 | 734 |                         const InstrToInstrs &UseToDefs, | 
 | 735 |                         const MachineDominatorTree *MDT) { | 
 | 736 |   if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) | 
 | 737 |     return false; | 
 | 738 |  | 
 | 739 |   const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); | 
 | 740 |   if (Def->getOpcode() != ARM64::ADRP) { | 
 | 741 |     // At this point, Def is ADDXri or LDRXui of the right type of | 
 | 742 |     // symbol, because we filtered out the uses that were not defined | 
 | 743 |     // by these kind of instructions (+ ADRP). | 
 | 744 |  | 
 | 745 |     // Check if this forms a simple chain: each intermediate node must | 
 | 746 |     // dominates the next one. | 
 | 747 |     if (!MDT->dominates(Def, Instr)) | 
 | 748 |       return false; | 
 | 749 |     // Move one node up in the simple chain. | 
 | 750 |     if (UseToDefs.find(Def) == UseToDefs.end() | 
 | 751 |                                // The map may contain garbage we have to ignore. | 
 | 752 |         || | 
 | 753 |         UseToDefs.find(Def)->second.empty()) | 
 | 754 |       return false; | 
 | 755 |     Instr = Def; | 
 | 756 |     Def = *UseToDefs.find(Def)->second.begin(); | 
 | 757 |   } | 
 | 758 |   // Check if we reached the top of the simple chain: | 
 | 759 |   // - top is ADRP. | 
 | 760 |   // - check the simple chain property: each intermediate node must | 
 | 761 |   // dominates the next one. | 
 | 762 |   if (Def->getOpcode() == ARM64::ADRP) | 
 | 763 |     return MDT->dominates(Def, Instr); | 
 | 764 |   return false; | 
 | 765 | } | 
 | 766 |  | 
 | 767 | static bool registerADRCandidate(const MachineInstr *Use, | 
 | 768 |                                  const InstrToInstrs &UseToDefs, | 
 | 769 |                                  const InstrToInstrs *DefsPerColorToUses, | 
 | 770 |                                  ARM64FunctionInfo &ARM64FI, | 
 | 771 |                                  SetOfMachineInstr *InvolvedInLOHs, | 
 | 772 |                                  const MapRegToId &RegToId) { | 
 | 773 |   // Look for opportunities to turn ADRP -> ADD or | 
 | 774 |   // ADRP -> LDR GOTPAGEOFF into ADR. | 
 | 775 |   // If ADRP has more than one use. Give up. | 
 | 776 |   if (Use->getOpcode() != ARM64::ADDXri && | 
 | 777 |       (Use->getOpcode() != ARM64::LDRXui || | 
 | 778 |        !(Use->getOperand(2).getTargetFlags() & ARM64II::MO_GOT))) | 
 | 779 |     return false; | 
 | 780 |   InstrToInstrs::const_iterator It = UseToDefs.find(Use); | 
 | 781 |   // The map may contain garbage that we need to ignore. | 
 | 782 |   if (It == UseToDefs.end() || It->second.empty()) | 
 | 783 |     return false; | 
 | 784 |   const MachineInstr *Def = *It->second.begin(); | 
 | 785 |   if (Def->getOpcode() != ARM64::ADRP) | 
 | 786 |     return false; | 
 | 787 |   // Check the number of users of ADRP. | 
 | 788 |   const SetOfMachineInstr *Users = | 
 | 789 |       getUses(DefsPerColorToUses, | 
 | 790 |               RegToId.find(Def->getOperand(0).getReg())->second, Def); | 
 | 791 |   if (Users->size() > 1) { | 
 | 792 |     ++NumADRComplexCandidate; | 
 | 793 |     return false; | 
 | 794 |   } | 
 | 795 |   ++NumADRSimpleCandidate; | 
 | 796 |   assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Def)) && | 
 | 797 |          "ADRP already involved in LOH."); | 
 | 798 |   assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Use)) && | 
 | 799 |          "ADD already involved in LOH."); | 
 | 800 |   DEBUG(dbgs() << "Record AdrpAdd\n" << *Def << '\n' << *Use << '\n'); | 
 | 801 |  | 
 | 802 |   SmallVector<const MachineInstr *, 2> Args; | 
 | 803 |   Args.push_back(Def); | 
 | 804 |   Args.push_back(Use); | 
 | 805 |  | 
 | 806 |   ARM64FI.addLOHDirective(Use->getOpcode() == ARM64::ADDXri ? MCLOH_AdrpAdd | 
 | 807 |                                                             : MCLOH_AdrpLdrGot, | 
 | 808 |                           Args); | 
 | 809 |   return true; | 
 | 810 | } | 
 | 811 |  | 
 | 812 | /// Based on the use to defs information (in non-ADRPMode), compute the | 
 | 813 | /// opportunities of LOH non-ADRP-related | 
 | 814 | static void computeOthers(const InstrToInstrs &UseToDefs, | 
 | 815 |                           const InstrToInstrs *DefsPerColorToUses, | 
 | 816 |                           ARM64FunctionInfo &ARM64FI, const MapRegToId &RegToId, | 
 | 817 |                           const MachineDominatorTree *MDT) { | 
 | 818 |   SetOfMachineInstr *InvolvedInLOHs = NULL; | 
 | 819 | #ifdef DEBUG | 
 | 820 |   SetOfMachineInstr InvolvedInLOHsStorage; | 
 | 821 |   InvolvedInLOHs = &InvolvedInLOHsStorage; | 
 | 822 | #endif // DEBUG | 
 | 823 |   DEBUG(dbgs() << "*** Compute LOH for Others\n"); | 
 | 824 |   // ADRP -> ADD/LDR -> LDR/STR pattern. | 
 | 825 |   // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. | 
 | 826 |  | 
 | 827 |   // FIXME: When the statistics are not important, | 
 | 828 |   // This initial filtering loop can be merged into the next loop. | 
 | 829 |   // Currently, we didn't do it to have the same code for both DEBUG and | 
 | 830 |   // NDEBUG builds. Indeed, the iterator of the second loop would need | 
 | 831 |   // to be changed. | 
 | 832 |   SetOfMachineInstr PotentialCandidates; | 
 | 833 |   SetOfMachineInstr PotentialADROpportunities; | 
 | 834 |   for (InstrToInstrs::const_iterator UseIt = UseToDefs.begin(), | 
 | 835 |                                      EndUseIt = UseToDefs.end(); | 
 | 836 |        UseIt != EndUseIt; ++UseIt) { | 
 | 837 |     // If no definition is available, this is a non candidate. | 
 | 838 |     if (UseIt->second.empty()) | 
 | 839 |       continue; | 
 | 840 |     // Keep only instructions that are load or store and at the end of | 
 | 841 |     // a ADRP -> ADD/LDR/Nothing chain. | 
 | 842 |     // We already filtered out the no-chain cases. | 
 | 843 |     if (!isCandidate(UseIt->first, UseToDefs, MDT)) { | 
 | 844 |       PotentialADROpportunities.insert(UseIt->first); | 
 | 845 |       continue; | 
 | 846 |     } | 
 | 847 |     PotentialCandidates.insert(UseIt->first); | 
 | 848 |   } | 
 | 849 |  | 
 | 850 |   // Make the following distinctions for statistics as the linker does | 
 | 851 |   // know how to decode instructions: | 
 | 852 |   // - ADD/LDR/Nothing make there different patterns. | 
 | 853 |   // - LDR/STR make two different patterns. | 
 | 854 |   // Hence, 6 - 1 base patterns. | 
 | 855 |   // (because ADRP-> Nothing -> STR is not simplifiable) | 
 | 856 |  | 
 | 857 |   // The linker is only able to have a simple semantic, i.e., if pattern A | 
 | 858 |   // do B. | 
 | 859 |   // However, we want to see the opportunity we may miss if we were able to | 
 | 860 |   // catch more complex cases. | 
 | 861 |  | 
 | 862 |   // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> | 
 | 863 |   // A potential candidate becomes a candidate, if its current immediate | 
 | 864 |   // operand is zero and all nodes of the chain have respectively only one user | 
 | 865 |   SetOfMachineInstr::const_iterator CandidateIt, EndCandidateIt; | 
 | 866 | #ifdef DEBUG | 
 | 867 |   SetOfMachineInstr DefsOfPotentialCandidates; | 
 | 868 | #endif | 
 | 869 |   for (CandidateIt = PotentialCandidates.begin(), | 
 | 870 |       EndCandidateIt = PotentialCandidates.end(); | 
 | 871 |        CandidateIt != EndCandidateIt; ++CandidateIt) { | 
 | 872 |     const MachineInstr *Candidate = *CandidateIt; | 
 | 873 |     // Get the definition of the candidate i.e., ADD or LDR. | 
 | 874 |     const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); | 
 | 875 |     // Record the elements of the chain. | 
 | 876 |     const MachineInstr *L1 = Def; | 
 | 877 |     const MachineInstr *L2 = NULL; | 
 | 878 |     unsigned ImmediateDefOpc = Def->getOpcode(); | 
 | 879 |     if (Def->getOpcode() != ARM64::ADRP) { | 
 | 880 |       // Check the number of users of this node. | 
 | 881 |       const SetOfMachineInstr *Users = | 
 | 882 |           getUses(DefsPerColorToUses, | 
 | 883 |                   RegToId.find(Def->getOperand(0).getReg())->second, Def); | 
 | 884 |       if (Users->size() > 1) { | 
 | 885 | #ifdef DEBUG | 
 | 886 |         // if all the uses of this def are in potential candidate, this is | 
 | 887 |         // a complex candidate of level 2. | 
 | 888 |         SetOfMachineInstr::const_iterator UseIt = Users->begin(); | 
 | 889 |         SetOfMachineInstr::const_iterator EndUseIt = Users->end(); | 
 | 890 |         for (; UseIt != EndUseIt; ++UseIt) { | 
 | 891 |           if (!PotentialCandidates.count(*UseIt)) { | 
 | 892 |             ++NumTooCplxLvl2; | 
 | 893 |             break; | 
 | 894 |           } | 
 | 895 |         } | 
 | 896 |         if (UseIt == EndUseIt) | 
 | 897 |           ++NumCplxLvl2; | 
 | 898 | #endif // DEBUG | 
 | 899 |         PotentialADROpportunities.insert(Def); | 
 | 900 |         continue; | 
 | 901 |       } | 
 | 902 |       L2 = Def; | 
 | 903 |       Def = *UseToDefs.find(Def)->second.begin(); | 
 | 904 |       L1 = Def; | 
 | 905 |     } // else the element in the middle of the chain is nothing, thus | 
 | 906 |       // Def already contains the first element of the chain. | 
 | 907 |  | 
 | 908 |     // Check the number of users of the first node in the chain, i.e., ADRP | 
 | 909 |     const SetOfMachineInstr *Users = | 
 | 910 |         getUses(DefsPerColorToUses, | 
 | 911 |                 RegToId.find(Def->getOperand(0).getReg())->second, Def); | 
 | 912 |     if (Users->size() > 1) { | 
 | 913 | #ifdef DEBUG | 
 | 914 |       // if all the uses of this def are in the defs of the potential candidate, | 
 | 915 |       // this is a complex candidate of level 1 | 
 | 916 |       if (DefsOfPotentialCandidates.empty()) { | 
 | 917 |         // lazy init | 
 | 918 |         DefsOfPotentialCandidates = PotentialCandidates; | 
 | 919 |         for (const MachineInstr *Candidate : PotentialCandidates) { | 
 | 920 |           if (!UseToDefs.find(Candidate)->second.empty()) | 
 | 921 |             DefsOfPotentialCandidates.insert( | 
 | 922 |                 *UseToDefs.find(Candidate)->second.begin()); | 
 | 923 |         } | 
 | 924 |       } | 
 | 925 |       bool Found = false; | 
 | 926 |       for (auto &Use: *Users) { | 
 | 927 |         if (!DefsOfPotentialCandidates.count(Use)) { | 
 | 928 |           ++NumTooCplxLvl1; | 
 | 929 |           Found = true; | 
 | 930 |           break; | 
 | 931 |         } | 
 | 932 |       } | 
 | 933 |       if (!Found) | 
 | 934 |         ++NumCplxLvl1; | 
 | 935 | #endif // DEBUG | 
 | 936 |       continue; | 
 | 937 |     } | 
 | 938 |  | 
 | 939 |     bool IsL2Add = (ImmediateDefOpc == ARM64::ADDXri); | 
 | 940 |     // If the chain is three instructions long and ldr is the second element, | 
 | 941 |     // then this ldr must load form GOT, otherwise this is not a correct chain. | 
 | 942 |     if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != ARM64II::MO_GOT) | 
 | 943 |       continue; | 
 | 944 |     SmallVector<const MachineInstr *, 3> Args; | 
 | 945 |     MCLOHType Kind; | 
 | 946 |     if (isCandidateLoad(Candidate)) { | 
 | 947 |       if (L2 == NULL) { | 
 | 948 |         // At this point, the candidate LOH indicates that the ldr instruction | 
 | 949 |         // may use a direct access to the symbol. There is not such encoding | 
 | 950 |         // for loads of byte and half. | 
 | 951 |         if (!supportLoadFromLiteral(Candidate)) | 
 | 952 |           continue; | 
 | 953 |  | 
 | 954 |         DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate | 
 | 955 |                      << '\n'); | 
 | 956 |         Kind = MCLOH_AdrpLdr; | 
 | 957 |         Args.push_back(L1); | 
 | 958 |         Args.push_back(Candidate); | 
 | 959 |         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && | 
 | 960 |                "L1 already involved in LOH."); | 
 | 961 |         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && | 
 | 962 |                "Candidate already involved in LOH."); | 
 | 963 |         ++NumADRPToLDR; | 
 | 964 |       } else { | 
 | 965 |         DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") | 
 | 966 |                      << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate | 
 | 967 |                      << '\n'); | 
 | 968 |  | 
 | 969 |         Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; | 
 | 970 |         Args.push_back(L1); | 
 | 971 |         Args.push_back(L2); | 
 | 972 |         Args.push_back(Candidate); | 
 | 973 |  | 
 | 974 |         PotentialADROpportunities.remove(L2); | 
 | 975 |         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && | 
 | 976 |                "L1 already involved in LOH."); | 
 | 977 |         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && | 
 | 978 |                "L2 already involved in LOH."); | 
 | 979 |         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && | 
 | 980 |                "Candidate already involved in LOH."); | 
 | 981 | #ifdef DEBUG | 
 | 982 |         // get the immediate of the load | 
 | 983 |         if (Candidate->getOperand(2).getImm() == 0) | 
 | 984 |           if (ImmediateDefOpc == ARM64::ADDXri) | 
 | 985 |             ++NumADDToLDR; | 
 | 986 |           else | 
 | 987 |             ++NumLDRToLDR; | 
 | 988 |         else if (ImmediateDefOpc == ARM64::ADDXri) | 
 | 989 |           ++NumADDToLDRWithImm; | 
 | 990 |         else | 
 | 991 |           ++NumLDRToLDRWithImm; | 
 | 992 | #endif // DEBUG | 
 | 993 |       } | 
 | 994 |     } else { | 
 | 995 |       if (ImmediateDefOpc == ARM64::ADRP) | 
 | 996 |         continue; | 
 | 997 |       else { | 
 | 998 |  | 
 | 999 |         DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") | 
 | 1000 |                      << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate | 
 | 1001 |                      << '\n'); | 
 | 1002 |  | 
 | 1003 |         Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; | 
 | 1004 |         Args.push_back(L1); | 
 | 1005 |         Args.push_back(L2); | 
 | 1006 |         Args.push_back(Candidate); | 
 | 1007 |  | 
 | 1008 |         PotentialADROpportunities.remove(L2); | 
 | 1009 |         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && | 
 | 1010 |                "L1 already involved in LOH."); | 
 | 1011 |         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && | 
 | 1012 |                "L2 already involved in LOH."); | 
 | 1013 |         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && | 
 | 1014 |                "Candidate already involved in LOH."); | 
 | 1015 | #ifdef DEBUG | 
 | 1016 |         // get the immediate of the store | 
 | 1017 |         if (Candidate->getOperand(2).getImm() == 0) | 
 | 1018 |           if (ImmediateDefOpc == ARM64::ADDXri) | 
 | 1019 |             ++NumADDToSTR; | 
 | 1020 |           else | 
 | 1021 |             ++NumLDRToSTR; | 
 | 1022 |         else if (ImmediateDefOpc == ARM64::ADDXri) | 
 | 1023 |           ++NumADDToSTRWithImm; | 
 | 1024 |         else | 
 | 1025 |           ++NumLDRToSTRWithImm; | 
 | 1026 | #endif // DEBUG | 
 | 1027 |       } | 
 | 1028 |     } | 
 | 1029 |     ARM64FI.addLOHDirective(Kind, Args); | 
 | 1030 |   } | 
 | 1031 |  | 
 | 1032 |   // Now, we grabbed all the big patterns, check ADR opportunities. | 
 | 1033 |   for (const MachineInstr *Candidate: PotentialADROpportunities) | 
 | 1034 |     registerADRCandidate(Candidate, UseToDefs, DefsPerColorToUses, ARM64FI, | 
 | 1035 |                          InvolvedInLOHs, RegToId); | 
 | 1036 | } | 
 | 1037 |  | 
 | 1038 | /// Look for every register defined by potential LOHs candidates. | 
 | 1039 | /// Map these registers with dense id in @p RegToId and vice-versa in | 
 | 1040 | /// @p IdToReg. @p IdToReg is populated only in DEBUG mode. | 
 | 1041 | static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, | 
 | 1042 |                                MapIdToReg &IdToReg, | 
 | 1043 |                                const TargetRegisterInfo *TRI) { | 
 | 1044 |   unsigned CurRegId = 0; | 
 | 1045 |   if (!PreCollectRegister) { | 
 | 1046 |     unsigned NbReg = TRI->getNumRegs(); | 
 | 1047 |     for (; CurRegId < NbReg; ++CurRegId) { | 
 | 1048 |       RegToId[CurRegId] = CurRegId; | 
 | 1049 |       DEBUG(IdToReg.push_back(CurRegId)); | 
 | 1050 |       DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); | 
 | 1051 |     } | 
 | 1052 |     return; | 
 | 1053 |   } | 
 | 1054 |  | 
 | 1055 |   DEBUG(dbgs() << "** Collect Involved Register\n"); | 
 | 1056 |   for (MachineFunction::const_iterator IMBB = MF.begin(), IMBBEnd = MF.end(); | 
 | 1057 |        IMBB != IMBBEnd; ++IMBB) | 
 | 1058 |     for (MachineBasicBlock::const_iterator II = IMBB->begin(), | 
 | 1059 |                                            IEnd = IMBB->end(); | 
 | 1060 |          II != IEnd; ++II) { | 
 | 1061 |  | 
 | 1062 |       if (!canDefBePartOfLOH(II)) | 
 | 1063 |         continue; | 
 | 1064 |  | 
 | 1065 |       // Process defs | 
 | 1066 |       for (MachineInstr::const_mop_iterator IO = II->operands_begin(), | 
 | 1067 |                                             IOEnd = II->operands_end(); | 
 | 1068 |            IO != IOEnd; ++IO) { | 
 | 1069 |         if (!IO->isReg() || !IO->isDef()) | 
 | 1070 |           continue; | 
 | 1071 |         unsigned CurReg = IO->getReg(); | 
 | 1072 |         for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) | 
 | 1073 |           if (RegToId.find(*AI) == RegToId.end()) { | 
 | 1074 |             DEBUG(IdToReg.push_back(*AI); | 
 | 1075 |                   assert(IdToReg[CurRegId] == *AI && | 
 | 1076 |                          "Reg index mismatches insertion index.")); | 
 | 1077 |             RegToId[*AI] = CurRegId++; | 
 | 1078 |             DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); | 
 | 1079 |           } | 
 | 1080 |       } | 
 | 1081 |     } | 
 | 1082 | } | 
 | 1083 |  | 
 | 1084 | bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &Fn) { | 
 | 1085 |   const TargetMachine &TM = Fn.getTarget(); | 
 | 1086 |   const TargetRegisterInfo *TRI = TM.getRegisterInfo(); | 
 | 1087 |   const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>(); | 
 | 1088 |  | 
 | 1089 |   MapRegToId RegToId; | 
 | 1090 |   MapIdToReg IdToReg; | 
 | 1091 |   ARM64FunctionInfo *ARM64FI = Fn.getInfo<ARM64FunctionInfo>(); | 
 | 1092 |   assert(ARM64FI && "No MachineFunctionInfo for this function!"); | 
 | 1093 |  | 
 | 1094 |   DEBUG(dbgs() << "Looking for LOH in " << Fn.getName() << '\n'); | 
 | 1095 |  | 
 | 1096 |   collectInvolvedReg(Fn, RegToId, IdToReg, TRI); | 
 | 1097 |   if (RegToId.empty()) | 
 | 1098 |     return false; | 
 | 1099 |  | 
 | 1100 |   MachineInstr *DummyOp = NULL; | 
 | 1101 |   if (BasicBlockScopeOnly) { | 
 | 1102 |     const ARM64InstrInfo *TII = | 
 | 1103 |         static_cast<const ARM64InstrInfo *>(TM.getInstrInfo()); | 
 | 1104 |     // For local analysis, create a dummy operation to record uses that are not | 
 | 1105 |     // local. | 
 | 1106 |     DummyOp = Fn.CreateMachineInstr(TII->get(ARM64::COPY), DebugLoc()); | 
 | 1107 |   } | 
 | 1108 |  | 
 | 1109 |   unsigned NbReg = RegToId.size(); | 
 | 1110 |   bool Modified = false; | 
 | 1111 |  | 
 | 1112 |   // Start with ADRP. | 
 | 1113 |   InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; | 
 | 1114 |  | 
 | 1115 |   // Compute the reaching def in ADRP mode, meaning ADRP definitions | 
 | 1116 |   // are first considered as uses. | 
 | 1117 |   reachingDef(&Fn, ColorOpToReachedUses, RegToId, true, DummyOp); | 
 | 1118 |   DEBUG(dbgs() << "ADRP reaching defs\n"); | 
 | 1119 |   DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); | 
 | 1120 |  | 
 | 1121 |   // Translate the definition to uses map into a use to definitions map to ease | 
 | 1122 |   // statistic computation. | 
 | 1123 |   InstrToInstrs ADRPToReachingDefs; | 
 | 1124 |   reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); | 
 | 1125 |  | 
 | 1126 |   // Compute LOH for ADRP. | 
 | 1127 |   computeADRP(ADRPToReachingDefs, *ARM64FI, MDT); | 
 | 1128 |   delete[] ColorOpToReachedUses; | 
 | 1129 |  | 
 | 1130 |   // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. | 
 | 1131 |   ColorOpToReachedUses = new InstrToInstrs[NbReg]; | 
 | 1132 |  | 
 | 1133 |   // first perform a regular reaching def analysis. | 
 | 1134 |   reachingDef(&Fn, ColorOpToReachedUses, RegToId, false, DummyOp); | 
 | 1135 |   DEBUG(dbgs() << "All reaching defs\n"); | 
 | 1136 |   DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); | 
 | 1137 |  | 
 | 1138 |   // Turn that into a use to defs to ease statistic computation. | 
 | 1139 |   InstrToInstrs UsesToReachingDefs; | 
 | 1140 |   reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); | 
 | 1141 |  | 
 | 1142 |   // Compute other than AdrpAdrp LOH. | 
 | 1143 |   computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *ARM64FI, RegToId, | 
 | 1144 |                 MDT); | 
 | 1145 |   delete[] ColorOpToReachedUses; | 
 | 1146 |  | 
 | 1147 |   if (BasicBlockScopeOnly) | 
 | 1148 |     Fn.DeleteMachineInstr(DummyOp); | 
 | 1149 |  | 
 | 1150 |   return Modified; | 
 | 1151 | } | 
 | 1152 |  | 
 | 1153 | /// createARM64CollectLOHPass - returns an instance of the Statistic for | 
 | 1154 | /// linker optimization pass. | 
 | 1155 | FunctionPass *llvm::createARM64CollectLOHPass() { | 
 | 1156 |   return new ARM64CollectLOH(); | 
 | 1157 | } |