blob: a6965e8403b00ade3630d7bc70a9d4f892adf142 [file] [log] [blame]
Evandro Menezes94edf022017-02-01 02:54:34 +00001//===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// \file This file contains the AArch64 implementation of the DAG scheduling mutation
11// to pair instructions back to back.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64MacroFusion.h"
16#include "AArch64Subtarget.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Target/TargetInstrInfo.h"
19
20#define DEBUG_TYPE "misched"
21
22using namespace llvm;
23
24static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden,
25 cl::desc("Enable scheduling for macro fusion."), cl::init(true));
26
27namespace {
28
NAKAMURA Takumi468487d2017-02-01 07:30:46 +000029/// \brief Verify that the instruction pair, First and Second,
Evandro Menezes94edf022017-02-01 02:54:34 +000030/// should be scheduled back to back. Given an anchor instruction, if the other
31/// instruction is unspecified, then verify that the anchor instruction may be
32/// part of a pair at all.
33static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
34 const AArch64Subtarget &ST,
35 const MachineInstr *First,
36 const MachineInstr *Second) {
Simon Pilgrimb0921662017-02-18 22:50:28 +000037 unsigned FirstOpcode =
Davide Italiano1aef59e2017-02-19 21:31:14 +000038 First ? First->getOpcode()
39 : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
Simon Pilgrimb0921662017-02-18 22:50:28 +000040 unsigned SecondOpcode =
Davide Italiano1aef59e2017-02-19 21:31:14 +000041 Second ? Second->getOpcode()
42 : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
Evandro Menezes94edf022017-02-01 02:54:34 +000043
44 if (ST.hasArithmeticBccFusion())
45 // Fuse CMN, CMP, TST followed by Bcc.
46 if (SecondOpcode == AArch64::Bcc)
47 switch (FirstOpcode) {
48 default:
49 return false;
50 case AArch64::ADDSWri:
51 case AArch64::ADDSWrr:
52 case AArch64::ADDSXri:
53 case AArch64::ADDSXrr:
54 case AArch64::ANDSWri:
55 case AArch64::ANDSWrr:
56 case AArch64::ANDSXri:
57 case AArch64::ANDSXrr:
58 case AArch64::SUBSWri:
59 case AArch64::SUBSWrr:
60 case AArch64::SUBSXri:
61 case AArch64::SUBSXrr:
62 case AArch64::BICSWrr:
63 case AArch64::BICSXrr:
64 return true;
65 case AArch64::ADDSWrs:
66 case AArch64::ADDSXrs:
67 case AArch64::ANDSWrs:
68 case AArch64::ANDSXrs:
69 case AArch64::SUBSWrs:
70 case AArch64::SUBSXrs:
71 case AArch64::BICSWrs:
72 case AArch64::BICSXrs:
73 // Shift value can be 0 making these behave like the "rr" variant...
74 return !TII.hasShiftedReg(*First);
75 case AArch64::INSTRUCTION_LIST_END:
76 return true;
77 }
78
79 if (ST.hasArithmeticCbzFusion())
80 // Fuse ALU operations followed by CBZ/CBNZ.
81 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
82 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX)
83 switch (FirstOpcode) {
84 default:
85 return false;
86 case AArch64::ADDWri:
87 case AArch64::ADDWrr:
88 case AArch64::ADDXri:
89 case AArch64::ADDXrr:
90 case AArch64::ANDWri:
91 case AArch64::ANDWrr:
92 case AArch64::ANDXri:
93 case AArch64::ANDXrr:
94 case AArch64::EORWri:
95 case AArch64::EORWrr:
96 case AArch64::EORXri:
97 case AArch64::EORXrr:
98 case AArch64::ORRWri:
99 case AArch64::ORRWrr:
100 case AArch64::ORRXri:
101 case AArch64::ORRXrr:
102 case AArch64::SUBWri:
103 case AArch64::SUBWrr:
104 case AArch64::SUBXri:
105 case AArch64::SUBXrr:
106 return true;
107 case AArch64::ADDWrs:
108 case AArch64::ADDXrs:
109 case AArch64::ANDWrs:
110 case AArch64::ANDXrs:
111 case AArch64::SUBWrs:
112 case AArch64::SUBXrs:
113 case AArch64::BICWrs:
114 case AArch64::BICXrs:
115 // Shift value can be 0 making these behave like the "rr" variant...
116 return !TII.hasShiftedReg(*First);
117 case AArch64::INSTRUCTION_LIST_END:
118 return true;
119 }
120
Evandro Menezesb21fb292017-02-01 02:54:39 +0000121 if (ST.hasFuseAES())
122 // Fuse AES crypto operations.
123 switch(FirstOpcode) {
124 // AES encode.
125 case AArch64::AESErr:
126 return SecondOpcode == AArch64::AESMCrr ||
127 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
128 // AES decode.
129 case AArch64::AESDrr:
130 return SecondOpcode == AArch64::AESIMCrr ||
131 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
132 }
133
Evandro Menezes455382e2017-02-01 02:54:42 +0000134 if (ST.hasFuseLiterals())
135 // Fuse literal generation operations.
136 switch (FirstOpcode) {
137 // PC relative address.
138 case AArch64::ADRP:
139 return SecondOpcode == AArch64::ADDXri ||
140 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
141 // 32 bit immediate.
142 case AArch64::MOVZWi:
143 return (SecondOpcode == AArch64::MOVKWi &&
144 Second->getOperand(3).getImm() == 16) ||
145 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
146 // Lower half of 64 bit immediate.
147 case AArch64::MOVZXi:
148 return (SecondOpcode == AArch64::MOVKXi &&
149 Second->getOperand(3).getImm() == 16) ||
150 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
151 // Upper half of 64 bit immediate.
152 case AArch64::MOVKXi:
153 return First->getOperand(3).getImm() == 32 &&
154 ((SecondOpcode == AArch64::MOVKXi &&
155 Second->getOperand(3).getImm() == 48) ||
156 SecondOpcode == AArch64::INSTRUCTION_LIST_END);
157 }
158
Evandro Menezes94edf022017-02-01 02:54:34 +0000159 return false;
160}
161
162/// \brief Implement the fusion of instruction pairs in the scheduling
NAKAMURA Takumi468487d2017-02-01 07:30:46 +0000163/// DAG, anchored at the instruction in ASU. Preds
Evandro Menezes94edf022017-02-01 02:54:34 +0000164/// indicates if its dependencies in \param APreds are predecessors instead of
165/// successors.
166static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit *ASU,
167 SmallVectorImpl<SDep> &APreds, bool Preds) {
168 const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(DAG->TII);
169 const AArch64Subtarget &ST = DAG->MF.getSubtarget<AArch64Subtarget>();
170
171 const MachineInstr *AMI = ASU->getInstr();
172 if (!AMI || AMI->isPseudo() || AMI->isTransient() ||
173 (Preds && !shouldScheduleAdjacent(*TII, ST, nullptr, AMI)) ||
174 (!Preds && !shouldScheduleAdjacent(*TII, ST, AMI, nullptr)))
175 return false;
176
177 for (SDep &BDep : APreds) {
178 if (BDep.isWeak())
179 continue;
180
181 SUnit *BSU = BDep.getSUnit();
182 const MachineInstr *BMI = BSU->getInstr();
183 if (!BMI || BMI->isPseudo() || BMI->isTransient() ||
184 (Preds && !shouldScheduleAdjacent(*TII, ST, BMI, AMI)) ||
185 (!Preds && !shouldScheduleAdjacent(*TII, ST, AMI, BMI)))
186 continue;
187
188 // Create a single weak edge between the adjacent instrs. The only
189 // effect is to cause bottom-up scheduling to heavily prioritize the
190 // clustered instrs.
191 if (Preds)
192 DAG->addEdge(ASU, SDep(BSU, SDep::Cluster));
193 else
194 DAG->addEdge(BSU, SDep(ASU, SDep::Cluster));
195
196 // Adjust the latency between the 1st instr and its predecessors/successors.
197 for (SDep &Dep : APreds)
198 if (Dep.getSUnit() == BSU)
199 Dep.setLatency(0);
200
201 // Adjust the latency between the 2nd instr and its successors/predecessors.
202 auto &BSuccs = Preds ? BSU->Succs : BSU->Preds;
203 for (SDep &Dep : BSuccs)
204 if (Dep.getSUnit() == ASU)
205 Dep.setLatency(0);
206
207 DEBUG(dbgs() << "Macro fuse ";
208 Preds ? BSU->print(dbgs(), DAG) : ASU->print(dbgs(), DAG);
209 dbgs() << " - ";
210 Preds ? ASU->print(dbgs(), DAG) : BSU->print(dbgs(), DAG);
211 dbgs() << '\n');
212
213 return true;
214 }
215
216 return false;
217}
218
219/// \brief Post-process the DAG to create cluster edges between instructions
220/// that may be fused by the processor into a single operation.
221class AArch64MacroFusion : public ScheduleDAGMutation {
222public:
223 AArch64MacroFusion() {}
224
225 void apply(ScheduleDAGInstrs *DAGInstrs) override;
226};
227
228void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
229 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
230
231 // For each of the SUnits in the scheduling block, try to fuse the instruction
232 // in it with one in its successors.
233 for (SUnit &ASU : DAG->SUnits)
234 scheduleAdjacentImpl(DAG, &ASU, ASU.Succs, false);
235
236 // Try to fuse the instruction in the ExitSU with one in its predecessors.
237 scheduleAdjacentImpl(DAG, &DAG->ExitSU, DAG->ExitSU.Preds, true);
238}
239
240} // end namespace
241
242
243namespace llvm {
244
245std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () {
246 return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr;
247}
248
249} // end namespace llvm