blob: a6926a6700e187299d9de480eb9b4a6e1625bdab [file] [log] [blame]
Evandro Menezes94edf022017-02-01 02:54:34 +00001//===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// \file This file contains the AArch64 implementation of the DAG scheduling mutation
11// to pair instructions back to back.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64MacroFusion.h"
16#include "AArch64Subtarget.h"
Evandro Menezesa8d33012017-02-21 22:16:13 +000017#include "llvm/ADT/Statistic.h"
Evandro Menezes94edf022017-02-01 02:54:34 +000018#include "llvm/Support/CommandLine.h"
19#include "llvm/Target/TargetInstrInfo.h"
20
21#define DEBUG_TYPE "misched"
22
Evandro Menezesa8d33012017-02-21 22:16:13 +000023STATISTIC(NumFused, "Number of instr pairs fused");
24
Evandro Menezes94edf022017-02-01 02:54:34 +000025using namespace llvm;
26
27static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden,
28 cl::desc("Enable scheduling for macro fusion."), cl::init(true));
29
30namespace {
31
Evandro Menezes203eef02017-04-11 19:13:11 +000032/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused
33/// together. Given an anchor instr, when the other instr is unspecified, then
34/// check if the anchor instr may be part of a fused pair at all.
35static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
36 const TargetSubtargetInfo &TSI,
37 const MachineInstr *FirstMI,
38 const MachineInstr *SecondMI) {
39 assert((FirstMI || SecondMI) && "At least one instr must be specified");
40
41 const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII);
42 const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
43
44 // Assume wildcards for unspecified instrs.
Simon Pilgrimb0921662017-02-18 22:50:28 +000045 unsigned FirstOpcode =
Evandro Menezes203eef02017-04-11 19:13:11 +000046 FirstMI ? FirstMI->getOpcode()
47 : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
Simon Pilgrimb0921662017-02-18 22:50:28 +000048 unsigned SecondOpcode =
Evandro Menezes203eef02017-04-11 19:13:11 +000049 SecondMI ? SecondMI->getOpcode()
50 : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
Evandro Menezes94edf022017-02-01 02:54:34 +000051
52 if (ST.hasArithmeticBccFusion())
53 // Fuse CMN, CMP, TST followed by Bcc.
54 if (SecondOpcode == AArch64::Bcc)
55 switch (FirstOpcode) {
56 default:
57 return false;
58 case AArch64::ADDSWri:
59 case AArch64::ADDSWrr:
60 case AArch64::ADDSXri:
61 case AArch64::ADDSXrr:
62 case AArch64::ANDSWri:
63 case AArch64::ANDSWrr:
64 case AArch64::ANDSXri:
65 case AArch64::ANDSXrr:
66 case AArch64::SUBSWri:
67 case AArch64::SUBSWrr:
68 case AArch64::SUBSXri:
69 case AArch64::SUBSXrr:
70 case AArch64::BICSWrr:
71 case AArch64::BICSXrr:
72 return true;
73 case AArch64::ADDSWrs:
74 case AArch64::ADDSXrs:
75 case AArch64::ANDSWrs:
76 case AArch64::ANDSXrs:
77 case AArch64::SUBSWrs:
78 case AArch64::SUBSXrs:
79 case AArch64::BICSWrs:
80 case AArch64::BICSXrs:
81 // Shift value can be 0 making these behave like the "rr" variant...
Evandro Menezes203eef02017-04-11 19:13:11 +000082 return !II.hasShiftedReg(*FirstMI);
Evandro Menezes94edf022017-02-01 02:54:34 +000083 case AArch64::INSTRUCTION_LIST_END:
84 return true;
85 }
86
87 if (ST.hasArithmeticCbzFusion())
88 // Fuse ALU operations followed by CBZ/CBNZ.
89 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
90 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX)
91 switch (FirstOpcode) {
92 default:
93 return false;
94 case AArch64::ADDWri:
95 case AArch64::ADDWrr:
96 case AArch64::ADDXri:
97 case AArch64::ADDXrr:
98 case AArch64::ANDWri:
99 case AArch64::ANDWrr:
100 case AArch64::ANDXri:
101 case AArch64::ANDXrr:
102 case AArch64::EORWri:
103 case AArch64::EORWrr:
104 case AArch64::EORXri:
105 case AArch64::EORXrr:
106 case AArch64::ORRWri:
107 case AArch64::ORRWrr:
108 case AArch64::ORRXri:
109 case AArch64::ORRXrr:
110 case AArch64::SUBWri:
111 case AArch64::SUBWrr:
112 case AArch64::SUBXri:
113 case AArch64::SUBXrr:
114 return true;
115 case AArch64::ADDWrs:
116 case AArch64::ADDXrs:
117 case AArch64::ANDWrs:
118 case AArch64::ANDXrs:
119 case AArch64::SUBWrs:
120 case AArch64::SUBXrs:
121 case AArch64::BICWrs:
122 case AArch64::BICXrs:
123 // Shift value can be 0 making these behave like the "rr" variant...
Evandro Menezes203eef02017-04-11 19:13:11 +0000124 return !II.hasShiftedReg(*FirstMI);
Evandro Menezes94edf022017-02-01 02:54:34 +0000125 case AArch64::INSTRUCTION_LIST_END:
126 return true;
127 }
128
Evandro Menezesb21fb292017-02-01 02:54:39 +0000129 if (ST.hasFuseAES())
130 // Fuse AES crypto operations.
131 switch(FirstOpcode) {
132 // AES encode.
133 case AArch64::AESErr:
134 return SecondOpcode == AArch64::AESMCrr ||
135 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
136 // AES decode.
137 case AArch64::AESDrr:
138 return SecondOpcode == AArch64::AESIMCrr ||
139 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
140 }
141
Evandro Menezes455382e2017-02-01 02:54:42 +0000142 if (ST.hasFuseLiterals())
143 // Fuse literal generation operations.
144 switch (FirstOpcode) {
145 // PC relative address.
146 case AArch64::ADRP:
147 return SecondOpcode == AArch64::ADDXri ||
148 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
149 // 32 bit immediate.
150 case AArch64::MOVZWi:
151 return (SecondOpcode == AArch64::MOVKWi &&
Evandro Menezes203eef02017-04-11 19:13:11 +0000152 SecondMI->getOperand(3).getImm() == 16) ||
Evandro Menezes455382e2017-02-01 02:54:42 +0000153 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
154 // Lower half of 64 bit immediate.
155 case AArch64::MOVZXi:
156 return (SecondOpcode == AArch64::MOVKXi &&
Evandro Menezes203eef02017-04-11 19:13:11 +0000157 SecondMI->getOperand(3).getImm() == 16) ||
Evandro Menezes455382e2017-02-01 02:54:42 +0000158 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
159 // Upper half of 64 bit immediate.
160 case AArch64::MOVKXi:
Evandro Menezes203eef02017-04-11 19:13:11 +0000161 return FirstMI->getOperand(3).getImm() == 32 &&
Evandro Menezes455382e2017-02-01 02:54:42 +0000162 ((SecondOpcode == AArch64::MOVKXi &&
Evandro Menezes203eef02017-04-11 19:13:11 +0000163 SecondMI->getOperand(3).getImm() == 48) ||
Evandro Menezes455382e2017-02-01 02:54:42 +0000164 SecondOpcode == AArch64::INSTRUCTION_LIST_END);
165 }
166
Evandro Menezes94edf022017-02-01 02:54:34 +0000167 return false;
168}
169
Evandro Menezes203eef02017-04-11 19:13:11 +0000170/// \brief Implement the fusion of instr pairs in the scheduling DAG,
171/// anchored at the instr in AnchorSU..
172static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) {
173 const MachineInstr *AnchorMI = AnchorSU.getInstr();
174 if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient())
Evandro Menezes94edf022017-02-01 02:54:34 +0000175 return false;
176
Evandro Menezes203eef02017-04-11 19:13:11 +0000177 // If the anchor instr is the ExitSU, then consider its predecessors;
178 // otherwise, its successors.
179 bool Preds = (&AnchorSU == &DAG->ExitSU);
180 SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs;
181
182 const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI;
183 const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr;
184
185 // Check if the anchor instr may be fused.
186 if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
187 FirstMI, SecondMI))
188 return false;
189
190 // Explorer for fusion candidates among the dependencies of the anchor instr.
191 for (SDep &Dep : AnchorDeps) {
192 // Ignore dependencies that don't enforce ordering.
193 if (Dep.isWeak())
Evandro Menezes94edf022017-02-01 02:54:34 +0000194 continue;
195
Evandro Menezes203eef02017-04-11 19:13:11 +0000196 SUnit &DepSU = *Dep.getSUnit();
197 // Ignore the ExitSU if the dependents are successors.
198 if (!Preds && &DepSU == &DAG->ExitSU)
Evandro Menezes94edf022017-02-01 02:54:34 +0000199 continue;
200
Evandro Menezes203eef02017-04-11 19:13:11 +0000201 const MachineInstr *DepMI = DepSU.getInstr();
202 if (!DepMI || DepMI->isPseudo() || DepMI->isTransient())
203 continue;
Evandro Menezes94edf022017-02-01 02:54:34 +0000204
Evandro Menezes203eef02017-04-11 19:13:11 +0000205 FirstMI = Preds ? DepMI : AnchorMI;
206 SecondMI = Preds ? AnchorMI : DepMI;
207 if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
208 FirstMI, SecondMI))
209 continue;
Evandro Menezes94edf022017-02-01 02:54:34 +0000210
Evandro Menezes203eef02017-04-11 19:13:11 +0000211 // Create a single weak edge between the adjacent instrs. The only effect is
212 // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
213 SUnit &FirstSU = Preds ? DepSU : AnchorSU;
214 SUnit &SecondSU = Preds ? AnchorSU : DepSU;
215 DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
216
217 // Adjust the latency between the anchor instr and its
218 // predecessors/successors.
219 for (SDep &IDep : AnchorDeps)
220 if (IDep.getSUnit() == &DepSU)
221 IDep.setLatency(0);
222
223 // Adjust the latency between the dependent instr and its
224 // successors/predecessors.
225 for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds)
226 if (IDep.getSUnit() == &AnchorSU)
227 IDep.setLatency(0);
228
229 DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
230 FirstSU.print(dbgs(), DAG); dbgs() << " - ";
231 SecondSU.print(dbgs(), DAG); dbgs() << " / ";
232 dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " <<
233 DAG->TII->getName(SecondMI->getOpcode()) << '\n'; );
Evandro Menezes94edf022017-02-01 02:54:34 +0000234
Evandro Menezesa8d33012017-02-21 22:16:13 +0000235 ++NumFused;
Evandro Menezes94edf022017-02-01 02:54:34 +0000236 return true;
237 }
238
239 return false;
240}
241
Evandro Menezes203eef02017-04-11 19:13:11 +0000242/// \brief Post-process the DAG to create cluster edges between instrs that may
243/// be fused by the processor into a single operation.
Evandro Menezes94edf022017-02-01 02:54:34 +0000244class AArch64MacroFusion : public ScheduleDAGMutation {
245public:
246 AArch64MacroFusion() {}
247
248 void apply(ScheduleDAGInstrs *DAGInstrs) override;
249};
250
251void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
252 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
253
Evandro Menezes203eef02017-04-11 19:13:11 +0000254 // For each of the SUnits in the scheduling block, try to fuse the instr in it
255 // with one in its successors.
256 for (SUnit &ISU : DAG->SUnits)
257 scheduleAdjacentImpl(DAG, ISU);
Evandro Menezes94edf022017-02-01 02:54:34 +0000258
Evandro Menezes203eef02017-04-11 19:13:11 +0000259 // Try to fuse the instr in the ExitSU with one in its predecessors.
260 scheduleAdjacentImpl(DAG, DAG->ExitSU);
Evandro Menezes94edf022017-02-01 02:54:34 +0000261}
262
263} // end namespace
264
265
266namespace llvm {
267
268std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () {
269 return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr;
270}
271
272} // end namespace llvm