blob: cde2dab69b670905ff2d2b6d05c8dde2629ae937 [file] [log] [blame]
Evandro Menezes94edf022017-02-01 02:54:34 +00001//===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// \file This file contains the AArch64 implementation of the DAG scheduling mutation
11// to pair instructions back to back.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64MacroFusion.h"
16#include "AArch64Subtarget.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Target/TargetInstrInfo.h"
19
20#define DEBUG_TYPE "misched"
21
22using namespace llvm;
23
24static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden,
25 cl::desc("Enable scheduling for macro fusion."), cl::init(true));
26
27namespace {
28
NAKAMURA Takumi468487d2017-02-01 07:30:46 +000029/// \brief Verify that the instruction pair, First and Second,
Evandro Menezes94edf022017-02-01 02:54:34 +000030/// should be scheduled back to back. Given an anchor instruction, if the other
31/// instruction is unspecified, then verify that the anchor instruction may be
32/// part of a pair at all.
33static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
34 const AArch64Subtarget &ST,
35 const MachineInstr *First,
36 const MachineInstr *Second) {
Evandro Menezesb9b7f4b2017-02-21 22:16:11 +000037 assert((First || Second) && "At least one instr must be specified");
Simon Pilgrimb0921662017-02-18 22:50:28 +000038 unsigned FirstOpcode =
Evandro Menezesb9b7f4b2017-02-21 22:16:11 +000039 First ? First->getOpcode()
40 : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
Simon Pilgrimb0921662017-02-18 22:50:28 +000041 unsigned SecondOpcode =
Evandro Menezesb9b7f4b2017-02-21 22:16:11 +000042 Second ? Second->getOpcode()
43 : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
Evandro Menezes94edf022017-02-01 02:54:34 +000044
45 if (ST.hasArithmeticBccFusion())
46 // Fuse CMN, CMP, TST followed by Bcc.
47 if (SecondOpcode == AArch64::Bcc)
48 switch (FirstOpcode) {
49 default:
50 return false;
51 case AArch64::ADDSWri:
52 case AArch64::ADDSWrr:
53 case AArch64::ADDSXri:
54 case AArch64::ADDSXrr:
55 case AArch64::ANDSWri:
56 case AArch64::ANDSWrr:
57 case AArch64::ANDSXri:
58 case AArch64::ANDSXrr:
59 case AArch64::SUBSWri:
60 case AArch64::SUBSWrr:
61 case AArch64::SUBSXri:
62 case AArch64::SUBSXrr:
63 case AArch64::BICSWrr:
64 case AArch64::BICSXrr:
65 return true;
66 case AArch64::ADDSWrs:
67 case AArch64::ADDSXrs:
68 case AArch64::ANDSWrs:
69 case AArch64::ANDSXrs:
70 case AArch64::SUBSWrs:
71 case AArch64::SUBSXrs:
72 case AArch64::BICSWrs:
73 case AArch64::BICSXrs:
74 // Shift value can be 0 making these behave like the "rr" variant...
75 return !TII.hasShiftedReg(*First);
76 case AArch64::INSTRUCTION_LIST_END:
77 return true;
78 }
79
80 if (ST.hasArithmeticCbzFusion())
81 // Fuse ALU operations followed by CBZ/CBNZ.
82 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
83 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX)
84 switch (FirstOpcode) {
85 default:
86 return false;
87 case AArch64::ADDWri:
88 case AArch64::ADDWrr:
89 case AArch64::ADDXri:
90 case AArch64::ADDXrr:
91 case AArch64::ANDWri:
92 case AArch64::ANDWrr:
93 case AArch64::ANDXri:
94 case AArch64::ANDXrr:
95 case AArch64::EORWri:
96 case AArch64::EORWrr:
97 case AArch64::EORXri:
98 case AArch64::EORXrr:
99 case AArch64::ORRWri:
100 case AArch64::ORRWrr:
101 case AArch64::ORRXri:
102 case AArch64::ORRXrr:
103 case AArch64::SUBWri:
104 case AArch64::SUBWrr:
105 case AArch64::SUBXri:
106 case AArch64::SUBXrr:
107 return true;
108 case AArch64::ADDWrs:
109 case AArch64::ADDXrs:
110 case AArch64::ANDWrs:
111 case AArch64::ANDXrs:
112 case AArch64::SUBWrs:
113 case AArch64::SUBXrs:
114 case AArch64::BICWrs:
115 case AArch64::BICXrs:
116 // Shift value can be 0 making these behave like the "rr" variant...
117 return !TII.hasShiftedReg(*First);
118 case AArch64::INSTRUCTION_LIST_END:
119 return true;
120 }
121
Evandro Menezesb21fb292017-02-01 02:54:39 +0000122 if (ST.hasFuseAES())
123 // Fuse AES crypto operations.
124 switch(FirstOpcode) {
125 // AES encode.
126 case AArch64::AESErr:
127 return SecondOpcode == AArch64::AESMCrr ||
128 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
129 // AES decode.
130 case AArch64::AESDrr:
131 return SecondOpcode == AArch64::AESIMCrr ||
132 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
133 }
134
Evandro Menezes455382e2017-02-01 02:54:42 +0000135 if (ST.hasFuseLiterals())
136 // Fuse literal generation operations.
137 switch (FirstOpcode) {
138 // PC relative address.
139 case AArch64::ADRP:
140 return SecondOpcode == AArch64::ADDXri ||
141 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
142 // 32 bit immediate.
143 case AArch64::MOVZWi:
144 return (SecondOpcode == AArch64::MOVKWi &&
145 Second->getOperand(3).getImm() == 16) ||
146 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
147 // Lower half of 64 bit immediate.
148 case AArch64::MOVZXi:
149 return (SecondOpcode == AArch64::MOVKXi &&
150 Second->getOperand(3).getImm() == 16) ||
151 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
152 // Upper half of 64 bit immediate.
153 case AArch64::MOVKXi:
154 return First->getOperand(3).getImm() == 32 &&
155 ((SecondOpcode == AArch64::MOVKXi &&
156 Second->getOperand(3).getImm() == 48) ||
157 SecondOpcode == AArch64::INSTRUCTION_LIST_END);
158 }
159
Evandro Menezes94edf022017-02-01 02:54:34 +0000160 return false;
161}
162
163/// \brief Implement the fusion of instruction pairs in the scheduling
NAKAMURA Takumi468487d2017-02-01 07:30:46 +0000164/// DAG, anchored at the instruction in ASU. Preds
Evandro Menezes94edf022017-02-01 02:54:34 +0000165/// indicates if its dependencies in \param APreds are predecessors instead of
166/// successors.
167static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit *ASU,
168 SmallVectorImpl<SDep> &APreds, bool Preds) {
169 const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(DAG->TII);
170 const AArch64Subtarget &ST = DAG->MF.getSubtarget<AArch64Subtarget>();
171
172 const MachineInstr *AMI = ASU->getInstr();
173 if (!AMI || AMI->isPseudo() || AMI->isTransient() ||
174 (Preds && !shouldScheduleAdjacent(*TII, ST, nullptr, AMI)) ||
175 (!Preds && !shouldScheduleAdjacent(*TII, ST, AMI, nullptr)))
176 return false;
177
178 for (SDep &BDep : APreds) {
179 if (BDep.isWeak())
180 continue;
181
182 SUnit *BSU = BDep.getSUnit();
183 const MachineInstr *BMI = BSU->getInstr();
184 if (!BMI || BMI->isPseudo() || BMI->isTransient() ||
185 (Preds && !shouldScheduleAdjacent(*TII, ST, BMI, AMI)) ||
186 (!Preds && !shouldScheduleAdjacent(*TII, ST, AMI, BMI)))
187 continue;
188
189 // Create a single weak edge between the adjacent instrs. The only
190 // effect is to cause bottom-up scheduling to heavily prioritize the
191 // clustered instrs.
192 if (Preds)
193 DAG->addEdge(ASU, SDep(BSU, SDep::Cluster));
194 else
195 DAG->addEdge(BSU, SDep(ASU, SDep::Cluster));
196
197 // Adjust the latency between the 1st instr and its predecessors/successors.
198 for (SDep &Dep : APreds)
199 if (Dep.getSUnit() == BSU)
200 Dep.setLatency(0);
201
202 // Adjust the latency between the 2nd instr and its successors/predecessors.
203 auto &BSuccs = Preds ? BSU->Succs : BSU->Preds;
204 for (SDep &Dep : BSuccs)
205 if (Dep.getSUnit() == ASU)
206 Dep.setLatency(0);
207
208 DEBUG(dbgs() << "Macro fuse ";
209 Preds ? BSU->print(dbgs(), DAG) : ASU->print(dbgs(), DAG);
210 dbgs() << " - ";
211 Preds ? ASU->print(dbgs(), DAG) : BSU->print(dbgs(), DAG);
212 dbgs() << '\n');
213
214 return true;
215 }
216
217 return false;
218}
219
220/// \brief Post-process the DAG to create cluster edges between instructions
221/// that may be fused by the processor into a single operation.
222class AArch64MacroFusion : public ScheduleDAGMutation {
223public:
224 AArch64MacroFusion() {}
225
226 void apply(ScheduleDAGInstrs *DAGInstrs) override;
227};
228
229void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
230 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
231
232 // For each of the SUnits in the scheduling block, try to fuse the instruction
233 // in it with one in its successors.
234 for (SUnit &ASU : DAG->SUnits)
235 scheduleAdjacentImpl(DAG, &ASU, ASU.Succs, false);
236
237 // Try to fuse the instruction in the ExitSU with one in its predecessors.
238 scheduleAdjacentImpl(DAG, &DAG->ExitSU, DAG->ExitSU.Preds, true);
239}
240
241} // end namespace
242
243
244namespace llvm {
245
246std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () {
247 return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr;
248}
249
250} // end namespace llvm