Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 1 | //===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // \file This file contains the AArch64 implementation of the DAG scheduling mutation |
| 11 | // to pair instructions back to back. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "AArch64MacroFusion.h" |
| 16 | #include "AArch64Subtarget.h" |
Evandro Menezes | a8d3301 | 2017-02-21 22:16:13 +0000 | [diff] [blame] | 17 | #include "llvm/ADT/Statistic.h" |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 18 | #include "llvm/Support/CommandLine.h" |
| 19 | #include "llvm/Target/TargetInstrInfo.h" |
| 20 | |
| 21 | #define DEBUG_TYPE "misched" |
| 22 | |
Evandro Menezes | a8d3301 | 2017-02-21 22:16:13 +0000 | [diff] [blame] | 23 | STATISTIC(NumFused, "Number of instr pairs fused"); |
| 24 | |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 25 | using namespace llvm; |
| 26 | |
| 27 | static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden, |
| 28 | cl::desc("Enable scheduling for macro fusion."), cl::init(true)); |
| 29 | |
| 30 | namespace { |
| 31 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 32 | /// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused |
| 33 | /// together. Given an anchor instr, when the other instr is unspecified, then |
| 34 | /// check if the anchor instr may be part of a fused pair at all. |
| 35 | static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, |
| 36 | const TargetSubtargetInfo &TSI, |
| 37 | const MachineInstr *FirstMI, |
| 38 | const MachineInstr *SecondMI) { |
| 39 | assert((FirstMI || SecondMI) && "At least one instr must be specified"); |
| 40 | |
| 41 | const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII); |
| 42 | const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI); |
| 43 | |
| 44 | // Assume wildcards for unspecified instrs. |
Simon Pilgrim | b092166 | 2017-02-18 22:50:28 +0000 | [diff] [blame] | 45 | unsigned FirstOpcode = |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 46 | FirstMI ? FirstMI->getOpcode() |
| 47 | : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); |
Simon Pilgrim | b092166 | 2017-02-18 22:50:28 +0000 | [diff] [blame] | 48 | unsigned SecondOpcode = |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 49 | SecondMI ? SecondMI->getOpcode() |
| 50 | : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 51 | |
| 52 | if (ST.hasArithmeticBccFusion()) |
| 53 | // Fuse CMN, CMP, TST followed by Bcc. |
| 54 | if (SecondOpcode == AArch64::Bcc) |
| 55 | switch (FirstOpcode) { |
| 56 | default: |
| 57 | return false; |
| 58 | case AArch64::ADDSWri: |
| 59 | case AArch64::ADDSWrr: |
| 60 | case AArch64::ADDSXri: |
| 61 | case AArch64::ADDSXrr: |
| 62 | case AArch64::ANDSWri: |
| 63 | case AArch64::ANDSWrr: |
| 64 | case AArch64::ANDSXri: |
| 65 | case AArch64::ANDSXrr: |
| 66 | case AArch64::SUBSWri: |
| 67 | case AArch64::SUBSWrr: |
| 68 | case AArch64::SUBSXri: |
| 69 | case AArch64::SUBSXrr: |
| 70 | case AArch64::BICSWrr: |
| 71 | case AArch64::BICSXrr: |
| 72 | return true; |
| 73 | case AArch64::ADDSWrs: |
| 74 | case AArch64::ADDSXrs: |
| 75 | case AArch64::ANDSWrs: |
| 76 | case AArch64::ANDSXrs: |
| 77 | case AArch64::SUBSWrs: |
| 78 | case AArch64::SUBSXrs: |
| 79 | case AArch64::BICSWrs: |
| 80 | case AArch64::BICSXrs: |
| 81 | // Shift value can be 0 making these behave like the "rr" variant... |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 82 | return !II.hasShiftedReg(*FirstMI); |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 83 | case AArch64::INSTRUCTION_LIST_END: |
| 84 | return true; |
| 85 | } |
| 86 | |
| 87 | if (ST.hasArithmeticCbzFusion()) |
| 88 | // Fuse ALU operations followed by CBZ/CBNZ. |
| 89 | if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || |
| 90 | SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) |
| 91 | switch (FirstOpcode) { |
| 92 | default: |
| 93 | return false; |
| 94 | case AArch64::ADDWri: |
| 95 | case AArch64::ADDWrr: |
| 96 | case AArch64::ADDXri: |
| 97 | case AArch64::ADDXrr: |
| 98 | case AArch64::ANDWri: |
| 99 | case AArch64::ANDWrr: |
| 100 | case AArch64::ANDXri: |
| 101 | case AArch64::ANDXrr: |
| 102 | case AArch64::EORWri: |
| 103 | case AArch64::EORWrr: |
| 104 | case AArch64::EORXri: |
| 105 | case AArch64::EORXrr: |
| 106 | case AArch64::ORRWri: |
| 107 | case AArch64::ORRWrr: |
| 108 | case AArch64::ORRXri: |
| 109 | case AArch64::ORRXrr: |
| 110 | case AArch64::SUBWri: |
| 111 | case AArch64::SUBWrr: |
| 112 | case AArch64::SUBXri: |
| 113 | case AArch64::SUBXrr: |
| 114 | return true; |
| 115 | case AArch64::ADDWrs: |
| 116 | case AArch64::ADDXrs: |
| 117 | case AArch64::ANDWrs: |
| 118 | case AArch64::ANDXrs: |
| 119 | case AArch64::SUBWrs: |
| 120 | case AArch64::SUBXrs: |
| 121 | case AArch64::BICWrs: |
| 122 | case AArch64::BICXrs: |
| 123 | // Shift value can be 0 making these behave like the "rr" variant... |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 124 | return !II.hasShiftedReg(*FirstMI); |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 125 | case AArch64::INSTRUCTION_LIST_END: |
| 126 | return true; |
| 127 | } |
| 128 | |
Evandro Menezes | b21fb29 | 2017-02-01 02:54:39 +0000 | [diff] [blame] | 129 | if (ST.hasFuseAES()) |
| 130 | // Fuse AES crypto operations. |
| 131 | switch(FirstOpcode) { |
| 132 | // AES encode. |
| 133 | case AArch64::AESErr: |
| 134 | return SecondOpcode == AArch64::AESMCrr || |
| 135 | SecondOpcode == AArch64::INSTRUCTION_LIST_END; |
| 136 | // AES decode. |
| 137 | case AArch64::AESDrr: |
| 138 | return SecondOpcode == AArch64::AESIMCrr || |
| 139 | SecondOpcode == AArch64::INSTRUCTION_LIST_END; |
| 140 | } |
| 141 | |
Evandro Menezes | 455382e | 2017-02-01 02:54:42 +0000 | [diff] [blame] | 142 | if (ST.hasFuseLiterals()) |
| 143 | // Fuse literal generation operations. |
| 144 | switch (FirstOpcode) { |
| 145 | // PC relative address. |
| 146 | case AArch64::ADRP: |
| 147 | return SecondOpcode == AArch64::ADDXri || |
| 148 | SecondOpcode == AArch64::INSTRUCTION_LIST_END; |
| 149 | // 32 bit immediate. |
| 150 | case AArch64::MOVZWi: |
| 151 | return (SecondOpcode == AArch64::MOVKWi && |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 152 | SecondMI->getOperand(3).getImm() == 16) || |
Evandro Menezes | 455382e | 2017-02-01 02:54:42 +0000 | [diff] [blame] | 153 | SecondOpcode == AArch64::INSTRUCTION_LIST_END; |
| 154 | // Lower half of 64 bit immediate. |
| 155 | case AArch64::MOVZXi: |
| 156 | return (SecondOpcode == AArch64::MOVKXi && |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 157 | SecondMI->getOperand(3).getImm() == 16) || |
Evandro Menezes | 455382e | 2017-02-01 02:54:42 +0000 | [diff] [blame] | 158 | SecondOpcode == AArch64::INSTRUCTION_LIST_END; |
| 159 | // Upper half of 64 bit immediate. |
| 160 | case AArch64::MOVKXi: |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 161 | return FirstMI->getOperand(3).getImm() == 32 && |
Evandro Menezes | 455382e | 2017-02-01 02:54:42 +0000 | [diff] [blame] | 162 | ((SecondOpcode == AArch64::MOVKXi && |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 163 | SecondMI->getOperand(3).getImm() == 48) || |
Evandro Menezes | 455382e | 2017-02-01 02:54:42 +0000 | [diff] [blame] | 164 | SecondOpcode == AArch64::INSTRUCTION_LIST_END); |
| 165 | } |
| 166 | |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 167 | return false; |
| 168 | } |
| 169 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 170 | /// \brief Implement the fusion of instr pairs in the scheduling DAG, |
| 171 | /// anchored at the instr in AnchorSU.. |
| 172 | static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) { |
| 173 | const MachineInstr *AnchorMI = AnchorSU.getInstr(); |
| 174 | if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient()) |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 175 | return false; |
| 176 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 177 | // If the anchor instr is the ExitSU, then consider its predecessors; |
| 178 | // otherwise, its successors. |
| 179 | bool Preds = (&AnchorSU == &DAG->ExitSU); |
| 180 | SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs; |
| 181 | |
| 182 | const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI; |
| 183 | const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr; |
| 184 | |
| 185 | // Check if the anchor instr may be fused. |
| 186 | if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), |
| 187 | FirstMI, SecondMI)) |
| 188 | return false; |
| 189 | |
| 190 | // Explorer for fusion candidates among the dependencies of the anchor instr. |
| 191 | for (SDep &Dep : AnchorDeps) { |
| 192 | // Ignore dependencies that don't enforce ordering. |
| 193 | if (Dep.isWeak()) |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 194 | continue; |
| 195 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 196 | SUnit &DepSU = *Dep.getSUnit(); |
| 197 | // Ignore the ExitSU if the dependents are successors. |
| 198 | if (!Preds && &DepSU == &DAG->ExitSU) |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 199 | continue; |
| 200 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 201 | const MachineInstr *DepMI = DepSU.getInstr(); |
| 202 | if (!DepMI || DepMI->isPseudo() || DepMI->isTransient()) |
| 203 | continue; |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 204 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 205 | FirstMI = Preds ? DepMI : AnchorMI; |
| 206 | SecondMI = Preds ? AnchorMI : DepMI; |
| 207 | if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), |
| 208 | FirstMI, SecondMI)) |
| 209 | continue; |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 210 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 211 | // Create a single weak edge between the adjacent instrs. The only effect is |
| 212 | // to cause bottom-up scheduling to heavily prioritize the clustered instrs. |
| 213 | SUnit &FirstSU = Preds ? DepSU : AnchorSU; |
| 214 | SUnit &SecondSU = Preds ? AnchorSU : DepSU; |
| 215 | DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); |
| 216 | |
| 217 | // Adjust the latency between the anchor instr and its |
| 218 | // predecessors/successors. |
| 219 | for (SDep &IDep : AnchorDeps) |
| 220 | if (IDep.getSUnit() == &DepSU) |
| 221 | IDep.setLatency(0); |
| 222 | |
| 223 | // Adjust the latency between the dependent instr and its |
| 224 | // successors/predecessors. |
| 225 | for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds) |
| 226 | if (IDep.getSUnit() == &AnchorSU) |
| 227 | IDep.setLatency(0); |
| 228 | |
| 229 | DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; |
| 230 | FirstSU.print(dbgs(), DAG); dbgs() << " - "; |
| 231 | SecondSU.print(dbgs(), DAG); dbgs() << " / "; |
| 232 | dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " << |
| 233 | DAG->TII->getName(SecondMI->getOpcode()) << '\n'; ); |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 234 | |
Evandro Menezes | a8d3301 | 2017-02-21 22:16:13 +0000 | [diff] [blame] | 235 | ++NumFused; |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 236 | return true; |
| 237 | } |
| 238 | |
| 239 | return false; |
| 240 | } |
| 241 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 242 | /// \brief Post-process the DAG to create cluster edges between instrs that may |
| 243 | /// be fused by the processor into a single operation. |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 244 | class AArch64MacroFusion : public ScheduleDAGMutation { |
| 245 | public: |
| 246 | AArch64MacroFusion() {} |
| 247 | |
| 248 | void apply(ScheduleDAGInstrs *DAGInstrs) override; |
| 249 | }; |
| 250 | |
| 251 | void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { |
| 252 | ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); |
| 253 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 254 | // For each of the SUnits in the scheduling block, try to fuse the instr in it |
| 255 | // with one in its successors. |
| 256 | for (SUnit &ISU : DAG->SUnits) |
| 257 | scheduleAdjacentImpl(DAG, ISU); |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 258 | |
Evandro Menezes | 203eef0 | 2017-04-11 19:13:11 +0000 | [diff] [blame^] | 259 | // Try to fuse the instr in the ExitSU with one in its predecessors. |
| 260 | scheduleAdjacentImpl(DAG, DAG->ExitSU); |
Evandro Menezes | 94edf02 | 2017-02-01 02:54:34 +0000 | [diff] [blame] | 261 | } |
| 262 | |
| 263 | } // end namespace |
| 264 | |
| 265 | |
| 266 | namespace llvm { |
| 267 | |
| 268 | std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () { |
| 269 | return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr; |
| 270 | } |
| 271 | |
| 272 | } // end namespace llvm |