blob: 6bf4e6ea685ef073e5b1ff56111c29e19ce75a92 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
Jessica Paquetteea8cc092017-03-17 22:26:55 +000015#include "AArch64MachineFunctionInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000016#include "AArch64Subtarget.h"
17#include "MCTargetDesc/AArch64AddressingModes.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000018#include "Utils/AArch64BaseInfo.h"
19#include "llvm/ADT/ArrayRef.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000020#include "llvm/ADT/STLExtras.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000021#include "llvm/ADT/SmallVector.h"
Jessica Paquette4cf187b2017-09-27 20:47:39 +000022#include "llvm/CodeGen/LiveRegUnits.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000023#include "llvm/CodeGen/MachineBasicBlock.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000025#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineInstr.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000027#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineMemOperand.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000029#include "llvm/CodeGen/MachineOperand.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000030#include "llvm/CodeGen/MachineRegisterInfo.h"
Jessica Paquette02c124d2017-12-18 19:33:21 +000031#include "llvm/CodeGen/MachineModuleInfo.h"
Diana Picus4b972882016-09-13 07:45:17 +000032#include "llvm/CodeGen/StackMaps.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000033#include "llvm/CodeGen/TargetRegisterInfo.h"
34#include "llvm/CodeGen/TargetSubtargetInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000035#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/GlobalValue.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000037#include "llvm/MC/MCInst.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000038#include "llvm/MC/MCInstrDesc.h"
39#include "llvm/Support/Casting.h"
40#include "llvm/Support/CodeGen.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Compiler.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000043#include "llvm/Support/ErrorHandling.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000044#include "llvm/Support/MathExtras.h"
45#include "llvm/Target/TargetMachine.h"
46#include "llvm/Target/TargetOptions.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000047#include <cassert>
48#include <cstdint>
49#include <iterator>
50#include <utility>
Tim Northover3b0846e2014-05-24 12:50:23 +000051
52using namespace llvm;
53
54#define GET_INSTRINFO_CTOR_DTOR
55#include "AArch64GenInstrInfo.inc"
56
Jessica Paquette809d7082017-07-28 03:21:58 +000057static cl::opt<unsigned> TBZDisplacementBits(
58 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60
61static cl::opt<unsigned> CBZDisplacementBits(
62 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
Matt Arsenaulte8da1452016-08-02 08:06:17 +000064
65static cl::opt<unsigned>
Jessica Paquette809d7082017-07-28 03:21:58 +000066 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
Matt Arsenaulte8da1452016-08-02 08:06:17 +000068
Tim Northover3b0846e2014-05-24 12:50:23 +000069AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
70 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
Eric Christophera0de2532015-03-18 20:37:30 +000071 RI(STI.getTargetTriple()), Subtarget(STI) {}
Tim Northover3b0846e2014-05-24 12:50:23 +000072
73/// GetInstSize - Return the number of bytes of code the specified
74/// instruction may be. This returns the maximum number of bytes.
Sjoerd Meijer89217f82016-07-28 16:32:22 +000075unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000076 const MachineBasicBlock &MBB = *MI.getParent();
Tim Northoverd5531f72014-06-17 11:31:42 +000077 const MachineFunction *MF = MBB.getParent();
78 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +000079
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000080 if (MI.getOpcode() == AArch64::INLINEASM)
81 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
Tim Northoverd5531f72014-06-17 11:31:42 +000082
Diana Picus4b972882016-09-13 07:45:17 +000083 // FIXME: We currently only handle pseudoinstructions that don't get expanded
84 // before the assembly printer.
85 unsigned NumBytes = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000086 const MCInstrDesc &Desc = MI.getDesc();
Tim Northover3b0846e2014-05-24 12:50:23 +000087 switch (Desc.getOpcode()) {
88 default:
Diana Picusc65d8bd2016-07-27 15:13:25 +000089 // Anything not explicitly designated otherwise is a normal 4-byte insn.
Diana Picus4b972882016-09-13 07:45:17 +000090 NumBytes = 4;
91 break;
Tim Northover3b0846e2014-05-24 12:50:23 +000092 case TargetOpcode::DBG_VALUE:
93 case TargetOpcode::EH_LABEL:
94 case TargetOpcode::IMPLICIT_DEF:
95 case TargetOpcode::KILL:
Diana Picus4b972882016-09-13 07:45:17 +000096 NumBytes = 0;
97 break;
98 case TargetOpcode::STACKMAP:
99 // The upper bound for a stackmap intrinsic is the full length of its shadow
100 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
101 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
102 break;
103 case TargetOpcode::PATCHPOINT:
104 // The size of the patchpoint intrinsic is the number of bytes requested
105 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
106 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
107 break;
Diana Picusab5a4c72016-08-01 08:38:49 +0000108 case AArch64::TLSDESC_CALLSEQ:
109 // This gets lowered to an instruction sequence which takes 16 bytes
Diana Picus4b972882016-09-13 07:45:17 +0000110 NumBytes = 16;
111 break;
Tim Northover3b0846e2014-05-24 12:50:23 +0000112 }
113
Diana Picus4b972882016-09-13 07:45:17 +0000114 return NumBytes;
Tim Northover3b0846e2014-05-24 12:50:23 +0000115}
116
117static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
118 SmallVectorImpl<MachineOperand> &Cond) {
119 // Block ends with fall-through condbranch.
120 switch (LastInst->getOpcode()) {
121 default:
122 llvm_unreachable("Unknown branch instruction?");
123 case AArch64::Bcc:
124 Target = LastInst->getOperand(1).getMBB();
125 Cond.push_back(LastInst->getOperand(0));
126 break;
127 case AArch64::CBZW:
128 case AArch64::CBZX:
129 case AArch64::CBNZW:
130 case AArch64::CBNZX:
131 Target = LastInst->getOperand(1).getMBB();
132 Cond.push_back(MachineOperand::CreateImm(-1));
133 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
134 Cond.push_back(LastInst->getOperand(0));
135 break;
136 case AArch64::TBZW:
137 case AArch64::TBZX:
138 case AArch64::TBNZW:
139 case AArch64::TBNZX:
140 Target = LastInst->getOperand(2).getMBB();
141 Cond.push_back(MachineOperand::CreateImm(-1));
142 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
143 Cond.push_back(LastInst->getOperand(0));
144 Cond.push_back(LastInst->getOperand(1));
145 }
146}
147
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000148static unsigned getBranchDisplacementBits(unsigned Opc) {
149 switch (Opc) {
150 default:
151 llvm_unreachable("unexpected opcode!");
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000152 case AArch64::B:
153 return 64;
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000154 case AArch64::TBNZW:
155 case AArch64::TBZW:
156 case AArch64::TBNZX:
157 case AArch64::TBZX:
158 return TBZDisplacementBits;
159 case AArch64::CBNZW:
160 case AArch64::CBZW:
161 case AArch64::CBNZX:
162 case AArch64::CBZX:
163 return CBZDisplacementBits;
164 case AArch64::Bcc:
165 return BCCDisplacementBits;
166 }
167}
168
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000169bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
170 int64_t BrOffset) const {
171 unsigned Bits = getBranchDisplacementBits(BranchOp);
172 assert(Bits >= 3 && "max branch displacement must be enough to jump"
173 "over conditional branch expansion");
174 return isIntN(Bits, BrOffset / 4);
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000175}
176
Jessica Paquette809d7082017-07-28 03:21:58 +0000177MachineBasicBlock *
178AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000179 switch (MI.getOpcode()) {
180 default:
181 llvm_unreachable("unexpected opcode!");
182 case AArch64::B:
183 return MI.getOperand(0).getMBB();
184 case AArch64::TBZW:
185 case AArch64::TBNZW:
186 case AArch64::TBZX:
187 case AArch64::TBNZX:
188 return MI.getOperand(2).getMBB();
189 case AArch64::CBZW:
190 case AArch64::CBNZW:
191 case AArch64::CBZX:
192 case AArch64::CBNZX:
193 case AArch64::Bcc:
194 return MI.getOperand(1).getMBB();
195 }
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000196}
197
Tim Northover3b0846e2014-05-24 12:50:23 +0000198// Branch analysis.
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000199bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
200 MachineBasicBlock *&TBB,
201 MachineBasicBlock *&FBB,
202 SmallVectorImpl<MachineOperand> &Cond,
203 bool AllowModify) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000204 // If the block has no terminators, it just falls into the block after it.
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000205 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
206 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000207 return false;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000208
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000209 if (!isUnpredicatedTerminator(*I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000210 return false;
211
212 // Get the last instruction in the block.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000213 MachineInstr *LastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000214
215 // If there is only one terminator instruction, process it.
216 unsigned LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000217 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000218 if (isUncondBranchOpcode(LastOpc)) {
219 TBB = LastInst->getOperand(0).getMBB();
220 return false;
221 }
222 if (isCondBranchOpcode(LastOpc)) {
223 // Block ends with fall-through condbranch.
224 parseCondBranch(LastInst, TBB, Cond);
225 return false;
226 }
227 return true; // Can't handle indirect branch.
228 }
229
230 // Get the instruction before it if it is a terminator.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000231 MachineInstr *SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000232 unsigned SecondLastOpc = SecondLastInst->getOpcode();
233
234 // If AllowModify is true and the block ends with two or more unconditional
235 // branches, delete all but the first unconditional branch.
236 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
237 while (isUncondBranchOpcode(SecondLastOpc)) {
238 LastInst->eraseFromParent();
239 LastInst = SecondLastInst;
240 LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000241 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000242 // Return now the only terminator is an unconditional branch.
243 TBB = LastInst->getOperand(0).getMBB();
244 return false;
245 } else {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000246 SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000247 SecondLastOpc = SecondLastInst->getOpcode();
248 }
249 }
250 }
251
252 // If there are three terminators, we don't know what sort of block this is.
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000253 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000254 return true;
255
256 // If the block ends with a B and a Bcc, handle it.
257 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
258 parseCondBranch(SecondLastInst, TBB, Cond);
259 FBB = LastInst->getOperand(0).getMBB();
260 return false;
261 }
262
263 // If the block ends with two unconditional branches, handle it. The second
264 // one is not executed, so remove it.
265 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
266 TBB = SecondLastInst->getOperand(0).getMBB();
267 I = LastInst;
268 if (AllowModify)
269 I->eraseFromParent();
270 return false;
271 }
272
273 // ...likewise if it ends with an indirect branch followed by an unconditional
274 // branch.
275 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
276 I = LastInst;
277 if (AllowModify)
278 I->eraseFromParent();
279 return true;
280 }
281
282 // Otherwise, can't handle this.
283 return true;
284}
285
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000286bool AArch64InstrInfo::reverseBranchCondition(
Tim Northover3b0846e2014-05-24 12:50:23 +0000287 SmallVectorImpl<MachineOperand> &Cond) const {
288 if (Cond[0].getImm() != -1) {
289 // Regular Bcc
290 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
291 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
292 } else {
293 // Folded compare-and-branch
294 switch (Cond[1].getImm()) {
295 default:
296 llvm_unreachable("Unknown conditional branch!");
297 case AArch64::CBZW:
298 Cond[1].setImm(AArch64::CBNZW);
299 break;
300 case AArch64::CBNZW:
301 Cond[1].setImm(AArch64::CBZW);
302 break;
303 case AArch64::CBZX:
304 Cond[1].setImm(AArch64::CBNZX);
305 break;
306 case AArch64::CBNZX:
307 Cond[1].setImm(AArch64::CBZX);
308 break;
309 case AArch64::TBZW:
310 Cond[1].setImm(AArch64::TBNZW);
311 break;
312 case AArch64::TBNZW:
313 Cond[1].setImm(AArch64::TBZW);
314 break;
315 case AArch64::TBZX:
316 Cond[1].setImm(AArch64::TBNZX);
317 break;
318 case AArch64::TBNZX:
319 Cond[1].setImm(AArch64::TBZX);
320 break;
321 }
322 }
323
324 return false;
325}
326
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000327unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000328 int *BytesRemoved) const {
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000329 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
330 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000331 return 0;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000332
Tim Northover3b0846e2014-05-24 12:50:23 +0000333 if (!isUncondBranchOpcode(I->getOpcode()) &&
334 !isCondBranchOpcode(I->getOpcode()))
335 return 0;
336
337 // Remove the branch.
338 I->eraseFromParent();
339
340 I = MBB.end();
341
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000342 if (I == MBB.begin()) {
343 if (BytesRemoved)
344 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000345 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000346 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000347 --I;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000348 if (!isCondBranchOpcode(I->getOpcode())) {
349 if (BytesRemoved)
350 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000351 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000352 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000353
354 // Remove the branch.
355 I->eraseFromParent();
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000356 if (BytesRemoved)
357 *BytesRemoved = 8;
358
Tim Northover3b0846e2014-05-24 12:50:23 +0000359 return 2;
360}
361
362void AArch64InstrInfo::instantiateCondBranch(
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000363 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000364 ArrayRef<MachineOperand> Cond) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000365 if (Cond[0].getImm() != -1) {
366 // Regular Bcc
367 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
368 } else {
369 // Folded compare-and-branch
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000370 // Note that we use addOperand instead of addReg to keep the flags.
Tim Northover3b0846e2014-05-24 12:50:23 +0000371 const MachineInstrBuilder MIB =
Diana Picus116bbab2017-01-13 09:58:52 +0000372 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
Tim Northover3b0846e2014-05-24 12:50:23 +0000373 if (Cond.size() > 3)
374 MIB.addImm(Cond[3].getImm());
375 MIB.addMBB(TBB);
376 }
377}
378
Jessica Paquette809d7082017-07-28 03:21:58 +0000379unsigned AArch64InstrInfo::insertBranch(
380 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
381 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000382 // Shouldn't be a fall through.
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +0000383 assert(TBB && "insertBranch must not be told to insert a fallthrough");
Tim Northover3b0846e2014-05-24 12:50:23 +0000384
385 if (!FBB) {
386 if (Cond.empty()) // Unconditional branch?
387 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
388 else
389 instantiateCondBranch(MBB, DL, TBB, Cond);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000390
391 if (BytesAdded)
392 *BytesAdded = 4;
393
Tim Northover3b0846e2014-05-24 12:50:23 +0000394 return 1;
395 }
396
397 // Two-way conditional branch.
398 instantiateCondBranch(MBB, DL, TBB, Cond);
399 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000400
401 if (BytesAdded)
402 *BytesAdded = 8;
403
Tim Northover3b0846e2014-05-24 12:50:23 +0000404 return 2;
405}
406
407// Find the original register that VReg is copied from.
408static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
409 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
410 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
411 if (!DefMI->isFullCopy())
412 return VReg;
413 VReg = DefMI->getOperand(1).getReg();
414 }
415 return VReg;
416}
417
418// Determine if VReg is defined by an instruction that can be folded into a
419// csel instruction. If so, return the folded opcode, and the replacement
420// register.
421static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
422 unsigned *NewVReg = nullptr) {
423 VReg = removeCopies(MRI, VReg);
424 if (!TargetRegisterInfo::isVirtualRegister(VReg))
425 return 0;
426
427 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
428 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
429 unsigned Opc = 0;
430 unsigned SrcOpNum = 0;
431 switch (DefMI->getOpcode()) {
432 case AArch64::ADDSXri:
433 case AArch64::ADDSWri:
434 // if NZCV is used, do not fold.
435 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
436 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000437 // fall-through to ADDXri and ADDWri.
438 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000439 case AArch64::ADDXri:
440 case AArch64::ADDWri:
441 // add x, 1 -> csinc.
442 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
443 DefMI->getOperand(3).getImm() != 0)
444 return 0;
445 SrcOpNum = 1;
446 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
447 break;
448
449 case AArch64::ORNXrr:
450 case AArch64::ORNWrr: {
451 // not x -> csinv, represented as orn dst, xzr, src.
452 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
453 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
454 return 0;
455 SrcOpNum = 2;
456 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
457 break;
458 }
459
460 case AArch64::SUBSXrr:
461 case AArch64::SUBSWrr:
462 // if NZCV is used, do not fold.
463 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
464 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000465 // fall-through to SUBXrr and SUBWrr.
466 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000467 case AArch64::SUBXrr:
468 case AArch64::SUBWrr: {
469 // neg x -> csneg, represented as sub dst, xzr, src.
470 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
471 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
472 return 0;
473 SrcOpNum = 2;
474 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
475 break;
476 }
477 default:
478 return 0;
479 }
480 assert(Opc && SrcOpNum && "Missing parameters");
481
482 if (NewVReg)
483 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
484 return Opc;
485}
486
Jessica Paquette809d7082017-07-28 03:21:58 +0000487bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
488 ArrayRef<MachineOperand> Cond,
489 unsigned TrueReg, unsigned FalseReg,
490 int &CondCycles, int &TrueCycles,
491 int &FalseCycles) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000492 // Check register classes.
493 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
494 const TargetRegisterClass *RC =
Eric Christophera0de2532015-03-18 20:37:30 +0000495 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
Tim Northover3b0846e2014-05-24 12:50:23 +0000496 if (!RC)
497 return false;
498
499 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
500 unsigned ExtraCondLat = Cond.size() != 1;
501
502 // GPRs are handled by csel.
503 // FIXME: Fold in x+1, -x, and ~x when applicable.
504 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
505 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
506 // Single-cycle csel, csinc, csinv, and csneg.
507 CondCycles = 1 + ExtraCondLat;
508 TrueCycles = FalseCycles = 1;
509 if (canFoldIntoCSel(MRI, TrueReg))
510 TrueCycles = 0;
511 else if (canFoldIntoCSel(MRI, FalseReg))
512 FalseCycles = 0;
513 return true;
514 }
515
516 // Scalar floating point is handled by fcsel.
517 // FIXME: Form fabs, fmin, and fmax when applicable.
518 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
519 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
520 CondCycles = 5 + ExtraCondLat;
521 TrueCycles = FalseCycles = 2;
522 return true;
523 }
524
525 // Can't do vectors.
526 return false;
527}
528
529void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000530 MachineBasicBlock::iterator I,
531 const DebugLoc &DL, unsigned DstReg,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000532 ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000533 unsigned TrueReg, unsigned FalseReg) const {
534 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
535
536 // Parse the condition code, see parseCondBranch() above.
537 AArch64CC::CondCode CC;
538 switch (Cond.size()) {
539 default:
540 llvm_unreachable("Unknown condition opcode in Cond");
541 case 1: // b.cc
542 CC = AArch64CC::CondCode(Cond[0].getImm());
543 break;
544 case 3: { // cbz/cbnz
545 // We must insert a compare against 0.
546 bool Is64Bit;
547 switch (Cond[1].getImm()) {
548 default:
549 llvm_unreachable("Unknown branch opcode in Cond");
550 case AArch64::CBZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000551 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000552 CC = AArch64CC::EQ;
553 break;
554 case AArch64::CBZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000555 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000556 CC = AArch64CC::EQ;
557 break;
558 case AArch64::CBNZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000559 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000560 CC = AArch64CC::NE;
561 break;
562 case AArch64::CBNZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000563 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000564 CC = AArch64CC::NE;
565 break;
566 }
567 unsigned SrcReg = Cond[2].getReg();
568 if (Is64Bit) {
569 // cmp reg, #0 is actually subs xzr, reg, #0.
570 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
571 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
572 .addReg(SrcReg)
573 .addImm(0)
574 .addImm(0);
575 } else {
576 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
577 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
578 .addReg(SrcReg)
579 .addImm(0)
580 .addImm(0);
581 }
582 break;
583 }
584 case 4: { // tbz/tbnz
585 // We must insert a tst instruction.
586 switch (Cond[1].getImm()) {
587 default:
588 llvm_unreachable("Unknown branch opcode in Cond");
589 case AArch64::TBZW:
590 case AArch64::TBZX:
591 CC = AArch64CC::EQ;
592 break;
593 case AArch64::TBNZW:
594 case AArch64::TBNZX:
595 CC = AArch64CC::NE;
596 break;
597 }
598 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
599 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
600 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
601 .addReg(Cond[2].getReg())
602 .addImm(
603 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
604 else
605 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
606 .addReg(Cond[2].getReg())
607 .addImm(
608 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
609 break;
610 }
611 }
612
613 unsigned Opc = 0;
614 const TargetRegisterClass *RC = nullptr;
615 bool TryFold = false;
616 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
617 RC = &AArch64::GPR64RegClass;
618 Opc = AArch64::CSELXr;
619 TryFold = true;
620 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
621 RC = &AArch64::GPR32RegClass;
622 Opc = AArch64::CSELWr;
623 TryFold = true;
624 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
625 RC = &AArch64::FPR64RegClass;
626 Opc = AArch64::FCSELDrrr;
627 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
628 RC = &AArch64::FPR32RegClass;
629 Opc = AArch64::FCSELSrrr;
630 }
631 assert(RC && "Unsupported regclass");
632
633 // Try folding simple instructions into the csel.
634 if (TryFold) {
635 unsigned NewVReg = 0;
636 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
637 if (FoldedOpc) {
638 // The folded opcodes csinc, csinc and csneg apply the operation to
639 // FalseReg, so we need to invert the condition.
640 CC = AArch64CC::getInvertedCondCode(CC);
641 TrueReg = FalseReg;
642 } else
643 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
644
645 // Fold the operation. Leave any dead instructions for DCE to clean up.
646 if (FoldedOpc) {
647 FalseReg = NewVReg;
648 Opc = FoldedOpc;
649 // The extends the live range of NewVReg.
650 MRI.clearKillFlags(NewVReg);
651 }
652 }
653
654 // Pull all virtual register into the appropriate class.
655 MRI.constrainRegClass(TrueReg, RC);
656 MRI.constrainRegClass(FalseReg, RC);
657
658 // Insert the csel.
Jessica Paquette809d7082017-07-28 03:21:58 +0000659 BuildMI(MBB, I, DL, get(Opc), DstReg)
660 .addReg(TrueReg)
661 .addReg(FalseReg)
662 .addImm(CC);
Tim Northover3b0846e2014-05-24 12:50:23 +0000663}
664
Lawrence Hu687097a2015-07-23 23:55:28 +0000665/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000666static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
667 uint64_t Imm = MI.getOperand(1).getImm();
Weiming Zhaob33a5552015-07-23 19:24:53 +0000668 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
669 uint64_t Encoding;
670 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
671}
672
Jiangning Liucd296372014-07-29 02:09:26 +0000673// FIXME: this implementation should be micro-architecture dependent, so a
674// micro-architecture target hook should be introduced here in future.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000675bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
Matthias Braun651cff42016-06-02 18:03:53 +0000676 if (!Subtarget.hasCustomCheapAsMoveHandling())
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000677 return MI.isAsCheapAsAMove();
Evandro Menezes509516d2017-08-28 22:51:32 +0000678 if (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
679 isExynosShiftLeftFast(MI))
680 return true;
Evandro Menezesd23324a2016-05-04 20:47:25 +0000681
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000682 switch (MI.getOpcode()) {
Jiangning Liucd296372014-07-29 02:09:26 +0000683 default:
684 return false;
685
686 // add/sub on register without shift
687 case AArch64::ADDWri:
688 case AArch64::ADDXri:
689 case AArch64::SUBWri:
690 case AArch64::SUBXri:
Evandro Menezes509516d2017-08-28 22:51:32 +0000691 return (MI.getOperand(3).getImm() == 0);
Jiangning Liucd296372014-07-29 02:09:26 +0000692
693 // logical ops on immediate
694 case AArch64::ANDWri:
695 case AArch64::ANDXri:
696 case AArch64::EORWri:
697 case AArch64::EORXri:
698 case AArch64::ORRWri:
699 case AArch64::ORRXri:
700 return true;
701
702 // logical ops on register without shift
703 case AArch64::ANDWrr:
704 case AArch64::ANDXrr:
705 case AArch64::BICWrr:
706 case AArch64::BICXrr:
707 case AArch64::EONWrr:
708 case AArch64::EONXrr:
709 case AArch64::EORWrr:
710 case AArch64::EORXrr:
711 case AArch64::ORNWrr:
712 case AArch64::ORNXrr:
713 case AArch64::ORRWrr:
714 case AArch64::ORRXrr:
715 return true;
Evandro Menezesd23324a2016-05-04 20:47:25 +0000716
Weiming Zhaob33a5552015-07-23 19:24:53 +0000717 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
718 // ORRXri, it is as cheap as MOV
719 case AArch64::MOVi32imm:
720 return canBeExpandedToORR(MI, 32);
721 case AArch64::MOVi64imm:
722 return canBeExpandedToORR(MI, 64);
Haicheng Wu711ca862016-07-12 15:31:41 +0000723
Haicheng Wuf0b01272016-07-15 00:27:01 +0000724 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
725 // feature.
Sjoerd Meijerb0eb5fb2017-08-24 14:47:06 +0000726 case AArch64::FMOVH0:
Haicheng Wu711ca862016-07-12 15:31:41 +0000727 case AArch64::FMOVS0:
728 case AArch64::FMOVD0:
729 return Subtarget.hasZeroCycleZeroing();
Haicheng Wuf0b01272016-07-15 00:27:01 +0000730 case TargetOpcode::COPY:
731 return (Subtarget.hasZeroCycleZeroing() &&
732 (MI.getOperand(1).getReg() == AArch64::WZR ||
733 MI.getOperand(1).getReg() == AArch64::XZR));
Jiangning Liucd296372014-07-29 02:09:26 +0000734 }
735
736 llvm_unreachable("Unknown opcode to check as cheap as a move!");
737}
738
Evandro Menezes509516d2017-08-28 22:51:32 +0000739bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
740 unsigned Imm, Shift;
Evandro Menezes91650ef2017-09-18 19:00:36 +0000741 AArch64_AM::ShiftExtendType Ext;
Evandro Menezes509516d2017-08-28 22:51:32 +0000742
743 switch (MI.getOpcode()) {
744 default:
745 return false;
746
747 // WriteI
748 case AArch64::ADDSWri:
749 case AArch64::ADDSXri:
750 case AArch64::ADDWri:
751 case AArch64::ADDXri:
752 case AArch64::SUBSWri:
753 case AArch64::SUBSXri:
754 case AArch64::SUBWri:
755 case AArch64::SUBXri:
756 return true;
757
758 // WriteISReg
759 case AArch64::ADDSWrs:
760 case AArch64::ADDSXrs:
761 case AArch64::ADDWrs:
762 case AArch64::ADDXrs:
763 case AArch64::ANDSWrs:
764 case AArch64::ANDSXrs:
765 case AArch64::ANDWrs:
766 case AArch64::ANDXrs:
767 case AArch64::BICSWrs:
768 case AArch64::BICSXrs:
769 case AArch64::BICWrs:
770 case AArch64::BICXrs:
771 case AArch64::EONWrs:
772 case AArch64::EONXrs:
773 case AArch64::EORWrs:
774 case AArch64::EORXrs:
775 case AArch64::ORNWrs:
776 case AArch64::ORNXrs:
777 case AArch64::ORRWrs:
778 case AArch64::ORRXrs:
779 case AArch64::SUBSWrs:
780 case AArch64::SUBSXrs:
781 case AArch64::SUBWrs:
782 case AArch64::SUBXrs:
783 Imm = MI.getOperand(3).getImm();
784 Shift = AArch64_AM::getShiftValue(Imm);
Evandro Menezes91650ef2017-09-18 19:00:36 +0000785 Ext = AArch64_AM::getShiftType(Imm);
786 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
Evandro Menezes509516d2017-08-28 22:51:32 +0000787
788 // WriteIEReg
789 case AArch64::ADDSWrx:
790 case AArch64::ADDSXrx:
791 case AArch64::ADDSXrx64:
792 case AArch64::ADDWrx:
793 case AArch64::ADDXrx:
794 case AArch64::ADDXrx64:
795 case AArch64::SUBSWrx:
796 case AArch64::SUBSXrx:
797 case AArch64::SUBSXrx64:
798 case AArch64::SUBWrx:
799 case AArch64::SUBXrx:
800 case AArch64::SUBXrx64:
801 Imm = MI.getOperand(3).getImm();
802 Shift = AArch64_AM::getArithShiftValue(Imm);
Evandro Menezes91650ef2017-09-18 19:00:36 +0000803 Ext = AArch64_AM::getArithExtendType(Imm);
804 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
805
806 case AArch64::PRFMroW:
807 case AArch64::PRFMroX:
808
809 // WriteLDIdx
810 case AArch64::LDRBBroW:
811 case AArch64::LDRBBroX:
812 case AArch64::LDRHHroW:
813 case AArch64::LDRHHroX:
814 case AArch64::LDRSBWroW:
815 case AArch64::LDRSBWroX:
816 case AArch64::LDRSBXroW:
817 case AArch64::LDRSBXroX:
818 case AArch64::LDRSHWroW:
819 case AArch64::LDRSHWroX:
820 case AArch64::LDRSHXroW:
821 case AArch64::LDRSHXroX:
822 case AArch64::LDRSWroW:
823 case AArch64::LDRSWroX:
824 case AArch64::LDRWroW:
825 case AArch64::LDRWroX:
826 case AArch64::LDRXroW:
827 case AArch64::LDRXroX:
828
829 case AArch64::LDRBroW:
830 case AArch64::LDRBroX:
831 case AArch64::LDRDroW:
832 case AArch64::LDRDroX:
833 case AArch64::LDRHroW:
834 case AArch64::LDRHroX:
835 case AArch64::LDRSroW:
836 case AArch64::LDRSroX:
837
838 // WriteSTIdx
839 case AArch64::STRBBroW:
840 case AArch64::STRBBroX:
841 case AArch64::STRHHroW:
842 case AArch64::STRHHroX:
843 case AArch64::STRWroW:
844 case AArch64::STRWroX:
845 case AArch64::STRXroW:
846 case AArch64::STRXroX:
847
848 case AArch64::STRBroW:
849 case AArch64::STRBroX:
850 case AArch64::STRDroW:
851 case AArch64::STRDroX:
852 case AArch64::STRHroW:
853 case AArch64::STRHroX:
854 case AArch64::STRSroW:
855 case AArch64::STRSroX:
856 Imm = MI.getOperand(3).getImm();
857 Ext = AArch64_AM::getMemExtendType(Imm);
858 return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
Evandro Menezes509516d2017-08-28 22:51:32 +0000859 }
860}
861
Geoff Berryd6ac96f2017-05-23 19:57:45 +0000862bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
863 switch (MI.getOpcode()) {
864 default:
Balaram Makamb4419f92017-04-08 03:30:15 +0000865 return false;
Geoff Berryd6ac96f2017-05-23 19:57:45 +0000866
867 case AArch64::ADDWrs:
868 case AArch64::ADDXrs:
869 case AArch64::ADDSWrs:
870 case AArch64::ADDSXrs: {
871 unsigned Imm = MI.getOperand(3).getImm();
872 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
873 if (ShiftVal == 0)
874 return true;
875 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
876 }
877
878 case AArch64::ADDWrx:
879 case AArch64::ADDXrx:
880 case AArch64::ADDXrx64:
881 case AArch64::ADDSWrx:
882 case AArch64::ADDSXrx:
883 case AArch64::ADDSXrx64: {
884 unsigned Imm = MI.getOperand(3).getImm();
885 switch (AArch64_AM::getArithExtendType(Imm)) {
886 default:
887 return false;
888 case AArch64_AM::UXTB:
889 case AArch64_AM::UXTH:
890 case AArch64_AM::UXTW:
891 case AArch64_AM::UXTX:
892 return AArch64_AM::getArithShiftValue(Imm) <= 4;
893 }
894 }
895
896 case AArch64::SUBWrs:
897 case AArch64::SUBSWrs: {
898 unsigned Imm = MI.getOperand(3).getImm();
899 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
900 return ShiftVal == 0 ||
901 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
902 }
903
904 case AArch64::SUBXrs:
905 case AArch64::SUBSXrs: {
906 unsigned Imm = MI.getOperand(3).getImm();
907 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
908 return ShiftVal == 0 ||
909 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
910 }
911
912 case AArch64::SUBWrx:
913 case AArch64::SUBXrx:
914 case AArch64::SUBXrx64:
915 case AArch64::SUBSWrx:
916 case AArch64::SUBSXrx:
917 case AArch64::SUBSXrx64: {
918 unsigned Imm = MI.getOperand(3).getImm();
919 switch (AArch64_AM::getArithExtendType(Imm)) {
920 default:
921 return false;
922 case AArch64_AM::UXTB:
923 case AArch64_AM::UXTH:
924 case AArch64_AM::UXTW:
925 case AArch64_AM::UXTX:
926 return AArch64_AM::getArithShiftValue(Imm) == 0;
927 }
928 }
929
930 case AArch64::LDRBBroW:
931 case AArch64::LDRBBroX:
932 case AArch64::LDRBroW:
933 case AArch64::LDRBroX:
934 case AArch64::LDRDroW:
935 case AArch64::LDRDroX:
936 case AArch64::LDRHHroW:
937 case AArch64::LDRHHroX:
938 case AArch64::LDRHroW:
939 case AArch64::LDRHroX:
940 case AArch64::LDRQroW:
941 case AArch64::LDRQroX:
942 case AArch64::LDRSBWroW:
943 case AArch64::LDRSBWroX:
944 case AArch64::LDRSBXroW:
945 case AArch64::LDRSBXroX:
946 case AArch64::LDRSHWroW:
947 case AArch64::LDRSHWroX:
948 case AArch64::LDRSHXroW:
949 case AArch64::LDRSHXroX:
950 case AArch64::LDRSWroW:
951 case AArch64::LDRSWroX:
952 case AArch64::LDRSroW:
953 case AArch64::LDRSroX:
954 case AArch64::LDRWroW:
955 case AArch64::LDRWroX:
956 case AArch64::LDRXroW:
957 case AArch64::LDRXroX:
958 case AArch64::PRFMroW:
959 case AArch64::PRFMroX:
960 case AArch64::STRBBroW:
961 case AArch64::STRBBroX:
962 case AArch64::STRBroW:
963 case AArch64::STRBroX:
964 case AArch64::STRDroW:
965 case AArch64::STRDroX:
966 case AArch64::STRHHroW:
967 case AArch64::STRHHroX:
968 case AArch64::STRHroW:
969 case AArch64::STRHroX:
970 case AArch64::STRQroW:
971 case AArch64::STRQroX:
972 case AArch64::STRSroW:
973 case AArch64::STRSroX:
974 case AArch64::STRWroW:
975 case AArch64::STRWroX:
976 case AArch64::STRXroW:
977 case AArch64::STRXroX: {
978 unsigned IsSigned = MI.getOperand(3).getImm();
979 return !IsSigned;
980 }
981 }
Balaram Makamb4419f92017-04-08 03:30:15 +0000982}
983
Tim Northover3b0846e2014-05-24 12:50:23 +0000984bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
985 unsigned &SrcReg, unsigned &DstReg,
986 unsigned &SubIdx) const {
987 switch (MI.getOpcode()) {
988 default:
989 return false;
990 case AArch64::SBFMXri: // aka sxtw
991 case AArch64::UBFMXri: // aka uxtw
992 // Check for the 32 -> 64 bit extension case, these instructions can do
993 // much more.
994 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
995 return false;
996 // This is a signed or unsigned 32 -> 64 bit extension.
997 SrcReg = MI.getOperand(1).getReg();
998 DstReg = MI.getOperand(0).getReg();
999 SubIdx = AArch64::sub_32;
1000 return true;
1001 }
1002}
1003
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001004bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1005 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
Eric Christophera0de2532015-03-18 20:37:30 +00001006 const TargetRegisterInfo *TRI = &getRegisterInfo();
Chad Rosier3528c1e2014-09-08 14:43:48 +00001007 unsigned BaseRegA = 0, BaseRegB = 0;
Chad Rosier0da267d2016-03-09 16:46:48 +00001008 int64_t OffsetA = 0, OffsetB = 0;
1009 unsigned WidthA = 0, WidthB = 0;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001010
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001011 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1012 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001013
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001014 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1015 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
Chad Rosier3528c1e2014-09-08 14:43:48 +00001016 return false;
1017
1018 // Retrieve the base register, offset from the base register and width. Width
1019 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1020 // base registers are identical, and the offset of a lower memory access +
1021 // the width doesn't overlap the offset of a higher memory access,
1022 // then the memory accesses are different.
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001023 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1024 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
Chad Rosier3528c1e2014-09-08 14:43:48 +00001025 if (BaseRegA == BaseRegB) {
1026 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1027 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1028 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1029 if (LowOffset + LowWidth <= HighOffset)
1030 return true;
1031 }
1032 }
1033 return false;
1034}
1035
Tim Northover3b0846e2014-05-24 12:50:23 +00001036/// analyzeCompare - For a comparison instruction, return the source registers
1037/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1038/// Return true if the comparison instruction can be analyzed.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001039bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
Tim Northover3b0846e2014-05-24 12:50:23 +00001040 unsigned &SrcReg2, int &CmpMask,
1041 int &CmpValue) const {
Tim Northover350a87e2017-10-17 21:43:52 +00001042 // The first operand can be a frame index where we'd normally expect a
1043 // register.
1044 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1045 if (!MI.getOperand(1).isReg())
1046 return false;
1047
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001048 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001049 default:
1050 break;
1051 case AArch64::SUBSWrr:
1052 case AArch64::SUBSWrs:
1053 case AArch64::SUBSWrx:
1054 case AArch64::SUBSXrr:
1055 case AArch64::SUBSXrs:
1056 case AArch64::SUBSXrx:
1057 case AArch64::ADDSWrr:
1058 case AArch64::ADDSWrs:
1059 case AArch64::ADDSWrx:
1060 case AArch64::ADDSXrr:
1061 case AArch64::ADDSXrs:
1062 case AArch64::ADDSXrx:
1063 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001064 SrcReg = MI.getOperand(1).getReg();
1065 SrcReg2 = MI.getOperand(2).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001066 CmpMask = ~0;
1067 CmpValue = 0;
1068 return true;
1069 case AArch64::SUBSWri:
1070 case AArch64::ADDSWri:
1071 case AArch64::SUBSXri:
1072 case AArch64::ADDSXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001073 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001074 SrcReg2 = 0;
1075 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +00001076 // FIXME: In order to convert CmpValue to 0 or 1
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001077 CmpValue = MI.getOperand(2).getImm() != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001078 return true;
1079 case AArch64::ANDSWri:
1080 case AArch64::ANDSXri:
1081 // ANDS does not use the same encoding scheme as the others xxxS
1082 // instructions.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001083 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001084 SrcReg2 = 0;
1085 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +00001086 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1087 // while the type of CmpValue is int. When converting uint64_t to int,
1088 // the high 32 bits of uint64_t will be lost.
1089 // In fact it causes a bug in spec2006-483.xalancbmk
1090 // CmpValue is only used to compare with zero in OptimizeCompareInstr
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001091 CmpValue = AArch64_AM::decodeLogicalImmediate(
1092 MI.getOperand(2).getImm(),
1093 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001094 return true;
1095 }
1096
1097 return false;
1098}
1099
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001100static bool UpdateOperandRegClass(MachineInstr &Instr) {
1101 MachineBasicBlock *MBB = Instr.getParent();
Tim Northover3b0846e2014-05-24 12:50:23 +00001102 assert(MBB && "Can't get MachineBasicBlock here");
1103 MachineFunction *MF = MBB->getParent();
1104 assert(MF && "Can't get MachineFunction here");
Eric Christopher6c901622015-01-28 03:51:33 +00001105 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1106 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001107 MachineRegisterInfo *MRI = &MF->getRegInfo();
1108
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001109 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
Tim Northover3b0846e2014-05-24 12:50:23 +00001110 ++OpIdx) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001111 MachineOperand &MO = Instr.getOperand(OpIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001112 const TargetRegisterClass *OpRegCstraints =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001113 Instr.getRegClassConstraint(OpIdx, TII, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001114
1115 // If there's no constraint, there's nothing to do.
1116 if (!OpRegCstraints)
1117 continue;
1118 // If the operand is a frame index, there's nothing to do here.
1119 // A frame index operand will resolve correctly during PEI.
1120 if (MO.isFI())
1121 continue;
1122
1123 assert(MO.isReg() &&
1124 "Operand has register constraints without being a register!");
1125
1126 unsigned Reg = MO.getReg();
1127 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1128 if (!OpRegCstraints->contains(Reg))
1129 return false;
1130 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1131 !MRI->constrainRegClass(Reg, OpRegCstraints))
1132 return false;
1133 }
1134
1135 return true;
1136}
1137
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001138/// \brief Return the opcode that does not set flags when possible - otherwise
1139/// return the original opcode. The caller is responsible to do the actual
1140/// substitution and legality checking.
Chad Rosier6db9ff62017-06-23 19:20:12 +00001141static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001142 // Don't convert all compare instructions, because for some the zero register
1143 // encoding becomes the sp register.
1144 bool MIDefinesZeroReg = false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001145 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001146 MIDefinesZeroReg = true;
1147
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001148 switch (MI.getOpcode()) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001149 default:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001150 return MI.getOpcode();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001151 case AArch64::ADDSWrr:
1152 return AArch64::ADDWrr;
1153 case AArch64::ADDSWri:
1154 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1155 case AArch64::ADDSWrs:
1156 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1157 case AArch64::ADDSWrx:
1158 return AArch64::ADDWrx;
1159 case AArch64::ADDSXrr:
1160 return AArch64::ADDXrr;
1161 case AArch64::ADDSXri:
1162 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1163 case AArch64::ADDSXrs:
1164 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1165 case AArch64::ADDSXrx:
1166 return AArch64::ADDXrx;
1167 case AArch64::SUBSWrr:
1168 return AArch64::SUBWrr;
1169 case AArch64::SUBSWri:
1170 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1171 case AArch64::SUBSWrs:
1172 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1173 case AArch64::SUBSWrx:
1174 return AArch64::SUBWrx;
1175 case AArch64::SUBSXrr:
1176 return AArch64::SUBXrr;
1177 case AArch64::SUBSXri:
1178 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1179 case AArch64::SUBSXrs:
1180 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1181 case AArch64::SUBSXrx:
1182 return AArch64::SUBXrx;
1183 }
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001184}
Tim Northover3b0846e2014-05-24 12:50:23 +00001185
Jessica Paquette809d7082017-07-28 03:21:58 +00001186enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001187
1188/// True when condition flags are accessed (either by writing or reading)
1189/// on the instruction trace starting at From and ending at To.
1190///
1191/// Note: If From and To are from different blocks it's assumed CC are accessed
1192/// on the path.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001193static bool areCFlagsAccessedBetweenInstrs(
1194 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1195 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001196 // Early exit if To is at the beginning of the BB.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001197 if (To == To->getParent()->begin())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001198 return true;
1199
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001200 // Check whether the instructions are in the same basic block
1201 // If not, assume the condition flags might get modified somewhere.
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001202 if (To->getParent() != From->getParent())
1203 return true;
1204
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001205 // From must be above To.
Duncan P. N. Exon Smith18720962016-09-11 18:51:28 +00001206 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1207 [From](MachineInstr &MI) {
1208 return MI.getIterator() == From;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001209 }) != To->getParent()->rend());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001210
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001211 // We iterate backward starting \p To until we hit \p From.
1212 for (--To; To != From; --To) {
1213 const MachineInstr &Instr = *To;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001214
Jessica Paquette809d7082017-07-28 03:21:58 +00001215 if (((AccessToCheck & AK_Write) &&
1216 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1217 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001218 return true;
1219 }
1220 return false;
1221}
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001222
1223/// Try to optimize a compare instruction. A compare instruction is an
Jessica Paquette809d7082017-07-28 03:21:58 +00001224/// instruction which produces AArch64::NZCV. It can be truly compare
1225/// instruction
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001226/// when there are no uses of its destination register.
1227///
1228/// The following steps are tried in order:
1229/// 1. Convert CmpInstr into an unconditional version.
1230/// 2. Remove CmpInstr if above there is an instruction producing a needed
Jessica Paquette809d7082017-07-28 03:21:58 +00001231/// condition code or an instruction which can be converted into such an
1232/// instruction.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001233/// Only comparison with zero is supported.
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001234bool AArch64InstrInfo::optimizeCompareInstr(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001235 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001236 int CmpValue, const MachineRegisterInfo *MRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001237 assert(CmpInstr.getParent());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001238 assert(MRI);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001239
1240 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001241 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001242 if (DeadNZCVIdx != -1) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001243 if (CmpInstr.definesRegister(AArch64::WZR) ||
1244 CmpInstr.definesRegister(AArch64::XZR)) {
1245 CmpInstr.eraseFromParent();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001246 return true;
1247 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001248 unsigned Opc = CmpInstr.getOpcode();
Chad Rosier6db9ff62017-06-23 19:20:12 +00001249 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001250 if (NewOpc == Opc)
1251 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001252 const MCInstrDesc &MCID = get(NewOpc);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001253 CmpInstr.setDesc(MCID);
1254 CmpInstr.RemoveOperand(DeadNZCVIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001255 bool succeeded = UpdateOperandRegClass(CmpInstr);
1256 (void)succeeded;
1257 assert(succeeded && "Some operands reg class are incompatible!");
1258 return true;
1259 }
1260
1261 // Continue only if we have a "ri" where immediate is zero.
Jiangning Liudcc651f2014-08-08 14:19:29 +00001262 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1263 // function.
1264 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
Tim Northover3b0846e2014-05-24 12:50:23 +00001265 if (CmpValue != 0 || SrcReg2 != 0)
1266 return false;
1267
1268 // CmpInstr is a Compare instruction if destination register is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001269 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
Tim Northover3b0846e2014-05-24 12:50:23 +00001270 return false;
1271
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001272 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001273}
Tim Northover3b0846e2014-05-24 12:50:23 +00001274
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001275/// Get opcode of S version of Instr.
1276/// If Instr is S version its opcode is returned.
1277/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1278/// or we are not interested in it.
1279static unsigned sForm(MachineInstr &Instr) {
1280 switch (Instr.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001281 default:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001282 return AArch64::INSTRUCTION_LIST_END;
1283
Tim Northover3b0846e2014-05-24 12:50:23 +00001284 case AArch64::ADDSWrr:
1285 case AArch64::ADDSWri:
1286 case AArch64::ADDSXrr:
1287 case AArch64::ADDSXri:
1288 case AArch64::SUBSWrr:
1289 case AArch64::SUBSWri:
1290 case AArch64::SUBSXrr:
1291 case AArch64::SUBSXri:
Eugene Zelenko049b0172017-01-06 00:30:53 +00001292 return Instr.getOpcode();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001293
Jessica Paquette809d7082017-07-28 03:21:58 +00001294 case AArch64::ADDWrr:
1295 return AArch64::ADDSWrr;
1296 case AArch64::ADDWri:
1297 return AArch64::ADDSWri;
1298 case AArch64::ADDXrr:
1299 return AArch64::ADDSXrr;
1300 case AArch64::ADDXri:
1301 return AArch64::ADDSXri;
1302 case AArch64::ADCWr:
1303 return AArch64::ADCSWr;
1304 case AArch64::ADCXr:
1305 return AArch64::ADCSXr;
1306 case AArch64::SUBWrr:
1307 return AArch64::SUBSWrr;
1308 case AArch64::SUBWri:
1309 return AArch64::SUBSWri;
1310 case AArch64::SUBXrr:
1311 return AArch64::SUBSXrr;
1312 case AArch64::SUBXri:
1313 return AArch64::SUBSXri;
1314 case AArch64::SBCWr:
1315 return AArch64::SBCSWr;
1316 case AArch64::SBCXr:
1317 return AArch64::SBCSXr;
1318 case AArch64::ANDWri:
1319 return AArch64::ANDSWri;
1320 case AArch64::ANDXri:
1321 return AArch64::ANDSXri;
Tim Northover3b0846e2014-05-24 12:50:23 +00001322 }
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001323}
1324
1325/// Check if AArch64::NZCV should be alive in successors of MBB.
1326static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1327 for (auto *BB : MBB->successors())
1328 if (BB->isLiveIn(AArch64::NZCV))
1329 return true;
1330 return false;
1331}
1332
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001333namespace {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001334
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001335struct UsedNZCV {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001336 bool N = false;
1337 bool Z = false;
1338 bool C = false;
1339 bool V = false;
1340
1341 UsedNZCV() = default;
1342
Jessica Paquette809d7082017-07-28 03:21:58 +00001343 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001344 this->N |= UsedFlags.N;
1345 this->Z |= UsedFlags.Z;
1346 this->C |= UsedFlags.C;
1347 this->V |= UsedFlags.V;
1348 return *this;
1349 }
1350};
Eugene Zelenko049b0172017-01-06 00:30:53 +00001351
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001352} // end anonymous namespace
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001353
1354/// Find a condition code used by the instruction.
1355/// Returns AArch64CC::Invalid if either the instruction does not use condition
1356/// codes or we don't optimize CmpInstr in the presence of such instructions.
1357static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1358 switch (Instr.getOpcode()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001359 default:
1360 return AArch64CC::Invalid;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001361
Jessica Paquette809d7082017-07-28 03:21:58 +00001362 case AArch64::Bcc: {
1363 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1364 assert(Idx >= 2);
1365 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1366 }
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001367
Jessica Paquette809d7082017-07-28 03:21:58 +00001368 case AArch64::CSINVWr:
1369 case AArch64::CSINVXr:
1370 case AArch64::CSINCWr:
1371 case AArch64::CSINCXr:
1372 case AArch64::CSELWr:
1373 case AArch64::CSELXr:
1374 case AArch64::CSNEGWr:
1375 case AArch64::CSNEGXr:
1376 case AArch64::FCSELSrrr:
1377 case AArch64::FCSELDrrr: {
1378 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1379 assert(Idx >= 1);
1380 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1381 }
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001382 }
1383}
1384
1385static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1386 assert(CC != AArch64CC::Invalid);
1387 UsedNZCV UsedFlags;
1388 switch (CC) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001389 default:
1390 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001391
Jessica Paquette809d7082017-07-28 03:21:58 +00001392 case AArch64CC::EQ: // Z set
1393 case AArch64CC::NE: // Z clear
1394 UsedFlags.Z = true;
1395 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001396
Jessica Paquette809d7082017-07-28 03:21:58 +00001397 case AArch64CC::HI: // Z clear and C set
1398 case AArch64CC::LS: // Z set or C clear
1399 UsedFlags.Z = true;
1400 LLVM_FALLTHROUGH;
1401 case AArch64CC::HS: // C set
1402 case AArch64CC::LO: // C clear
1403 UsedFlags.C = true;
1404 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001405
Jessica Paquette809d7082017-07-28 03:21:58 +00001406 case AArch64CC::MI: // N set
1407 case AArch64CC::PL: // N clear
1408 UsedFlags.N = true;
1409 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001410
Jessica Paquette809d7082017-07-28 03:21:58 +00001411 case AArch64CC::VS: // V set
1412 case AArch64CC::VC: // V clear
1413 UsedFlags.V = true;
1414 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001415
Jessica Paquette809d7082017-07-28 03:21:58 +00001416 case AArch64CC::GT: // Z clear, N and V the same
1417 case AArch64CC::LE: // Z set, N and V differ
1418 UsedFlags.Z = true;
1419 LLVM_FALLTHROUGH;
1420 case AArch64CC::GE: // N and V the same
1421 case AArch64CC::LT: // N and V differ
1422 UsedFlags.N = true;
1423 UsedFlags.V = true;
1424 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001425 }
1426 return UsedFlags;
1427}
1428
1429static bool isADDSRegImm(unsigned Opcode) {
1430 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1431}
1432
1433static bool isSUBSRegImm(unsigned Opcode) {
1434 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1435}
1436
1437/// Check if CmpInstr can be substituted by MI.
1438///
1439/// CmpInstr can be substituted:
1440/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1441/// - and, MI and CmpInstr are from the same MachineBB
1442/// - and, condition flags are not alive in successors of the CmpInstr parent
1443/// - and, if MI opcode is the S form there must be no defs of flags between
1444/// MI and CmpInstr
1445/// or if MI opcode is not the S form there must be neither defs of flags
1446/// nor uses of flags between MI and CmpInstr.
1447/// - and C/V flags are not used after CmpInstr
1448static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
Jessica Paquette809d7082017-07-28 03:21:58 +00001449 const TargetRegisterInfo *TRI) {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001450 assert(MI);
1451 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1452 assert(CmpInstr);
1453
1454 const unsigned CmpOpcode = CmpInstr->getOpcode();
1455 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1456 return false;
1457
1458 if (MI->getParent() != CmpInstr->getParent())
1459 return false;
1460
1461 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1462 return false;
1463
1464 AccessKind AccessToCheck = AK_Write;
1465 if (sForm(*MI) != MI->getOpcode())
1466 AccessToCheck = AK_All;
1467 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1468 return false;
1469
1470 UsedNZCV NZCVUsedAfterCmp;
Jessica Paquette809d7082017-07-28 03:21:58 +00001471 for (auto I = std::next(CmpInstr->getIterator()),
1472 E = CmpInstr->getParent()->instr_end();
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001473 I != E; ++I) {
1474 const MachineInstr &Instr = *I;
1475 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1476 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1477 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1478 return false;
1479 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1480 }
1481
1482 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1483 break;
1484 }
Jessica Paquette809d7082017-07-28 03:21:58 +00001485
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001486 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1487}
1488
1489/// Substitute an instruction comparing to zero with another instruction
1490/// which produces needed condition flags.
1491///
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001492/// Return true on success.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001493bool AArch64InstrInfo::substituteCmpToZero(
1494 MachineInstr &CmpInstr, unsigned SrcReg,
1495 const MachineRegisterInfo *MRI) const {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001496 assert(MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001497 // Get the unique definition of SrcReg.
1498 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1499 if (!MI)
1500 return false;
1501
1502 const TargetRegisterInfo *TRI = &getRegisterInfo();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001503
1504 unsigned NewOpc = sForm(*MI);
1505 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1506 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001507
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001508 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001509 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001510
1511 // Update the instruction to set NZCV.
1512 MI->setDesc(get(NewOpc));
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001513 CmpInstr.eraseFromParent();
1514 bool succeeded = UpdateOperandRegClass(*MI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001515 (void)succeeded;
1516 assert(succeeded && "Some operands reg class are incompatible!");
1517 MI->addRegisterDefined(AArch64::NZCV, TRI);
1518 return true;
1519}
1520
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001521bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1522 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001523 return false;
1524
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001525 MachineBasicBlock &MBB = *MI.getParent();
1526 DebugLoc DL = MI.getDebugLoc();
1527 unsigned Reg = MI.getOperand(0).getReg();
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001528 const GlobalValue *GV =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001529 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001530 const TargetMachine &TM = MBB.getParent()->getTarget();
1531 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1532 const unsigned char MO_NC = AArch64II::MO_NC;
1533
1534 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1535 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1536 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1537 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001538 .addReg(Reg, RegState::Kill)
1539 .addImm(0)
1540 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001541 } else if (TM.getCodeModel() == CodeModel::Large) {
1542 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
Jessica Paquette809d7082017-07-28 03:21:58 +00001543 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1544 .addImm(0);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001545 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1546 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001547 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1548 .addImm(16);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001549 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1550 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001551 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1552 .addImm(32);
Evandro Menezes7960b2e2017-01-18 18:57:08 +00001553 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1554 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001555 .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1556 .addImm(48);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001557 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001558 .addReg(Reg, RegState::Kill)
1559 .addImm(0)
1560 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001561 } else {
1562 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1563 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1564 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1565 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1566 .addReg(Reg, RegState::Kill)
1567 .addGlobalAddress(GV, 0, LoFlags)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001568 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001569 }
1570
1571 MBB.erase(MI);
1572
1573 return true;
1574}
1575
Tim Northover3b0846e2014-05-24 12:50:23 +00001576/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001577bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1578 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001579 default:
1580 break;
1581 case AArch64::ADDSWrs:
1582 case AArch64::ADDSXrs:
1583 case AArch64::ADDWrs:
1584 case AArch64::ADDXrs:
1585 case AArch64::ANDSWrs:
1586 case AArch64::ANDSXrs:
1587 case AArch64::ANDWrs:
1588 case AArch64::ANDXrs:
1589 case AArch64::BICSWrs:
1590 case AArch64::BICSXrs:
1591 case AArch64::BICWrs:
1592 case AArch64::BICXrs:
Tim Northover3b0846e2014-05-24 12:50:23 +00001593 case AArch64::EONWrs:
1594 case AArch64::EONXrs:
1595 case AArch64::EORWrs:
1596 case AArch64::EORXrs:
1597 case AArch64::ORNWrs:
1598 case AArch64::ORNXrs:
1599 case AArch64::ORRWrs:
1600 case AArch64::ORRXrs:
1601 case AArch64::SUBSWrs:
1602 case AArch64::SUBSXrs:
1603 case AArch64::SUBWrs:
1604 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001605 if (MI.getOperand(3).isImm()) {
1606 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001607 return (val != 0);
1608 }
1609 break;
1610 }
1611 return false;
1612}
1613
1614/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001615bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1616 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001617 default:
1618 break;
1619 case AArch64::ADDSWrx:
1620 case AArch64::ADDSXrx:
1621 case AArch64::ADDSXrx64:
1622 case AArch64::ADDWrx:
1623 case AArch64::ADDXrx:
1624 case AArch64::ADDXrx64:
1625 case AArch64::SUBSWrx:
1626 case AArch64::SUBSXrx:
1627 case AArch64::SUBSXrx64:
1628 case AArch64::SUBWrx:
1629 case AArch64::SUBXrx:
1630 case AArch64::SUBXrx64:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001631 if (MI.getOperand(3).isImm()) {
1632 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001633 return (val != 0);
1634 }
1635 break;
1636 }
1637
1638 return false;
1639}
1640
1641// Return true if this instruction simply sets its single destination register
1642// to zero. This is equivalent to a register rename of the zero-register.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001643bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1644 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001645 default:
1646 break;
1647 case AArch64::MOVZWi:
1648 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001649 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1650 assert(MI.getDesc().getNumOperands() == 3 &&
1651 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001652 return true;
1653 }
1654 break;
1655 case AArch64::ANDWri: // and Rd, Rzr, #imm
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001656 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001657 case AArch64::ANDXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001658 return MI.getOperand(1).getReg() == AArch64::XZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001659 case TargetOpcode::COPY:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001660 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001661 }
1662 return false;
1663}
1664
1665// Return true if this instruction simply renames a general register without
1666// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001667bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1668 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001669 default:
1670 break;
1671 case TargetOpcode::COPY: {
1672 // GPR32 copies will by lowered to ORRXrs
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001673 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001674 return (AArch64::GPR32RegClass.contains(DstReg) ||
1675 AArch64::GPR64RegClass.contains(DstReg));
1676 }
1677 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001678 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1679 assert(MI.getDesc().getNumOperands() == 4 &&
1680 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001681 return true;
1682 }
Renato Golin541d7e72014-08-01 17:27:31 +00001683 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001684 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001685 if (MI.getOperand(2).getImm() == 0) {
1686 assert(MI.getDesc().getNumOperands() == 4 &&
1687 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001688 return true;
1689 }
Renato Golin541d7e72014-08-01 17:27:31 +00001690 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001691 }
1692 return false;
1693}
1694
1695// Return true if this instruction simply renames a general register without
1696// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001697bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1698 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001699 default:
1700 break;
1701 case TargetOpcode::COPY: {
1702 // FPR64 copies will by lowered to ORR.16b
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001703 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001704 return (AArch64::FPR64RegClass.contains(DstReg) ||
1705 AArch64::FPR128RegClass.contains(DstReg));
1706 }
1707 case AArch64::ORRv16i8:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001708 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1709 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001710 "invalid ORRv16i8 operands");
1711 return true;
1712 }
Renato Golin541d7e72014-08-01 17:27:31 +00001713 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001714 }
1715 return false;
1716}
1717
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001718unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001719 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001720 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001721 default:
1722 break;
1723 case AArch64::LDRWui:
1724 case AArch64::LDRXui:
1725 case AArch64::LDRBui:
1726 case AArch64::LDRHui:
1727 case AArch64::LDRSui:
1728 case AArch64::LDRDui:
1729 case AArch64::LDRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001730 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1731 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1732 FrameIndex = MI.getOperand(1).getIndex();
1733 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001734 }
1735 break;
1736 }
1737
1738 return 0;
1739}
1740
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001741unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001742 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001743 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001744 default:
1745 break;
1746 case AArch64::STRWui:
1747 case AArch64::STRXui:
1748 case AArch64::STRBui:
1749 case AArch64::STRHui:
1750 case AArch64::STRSui:
1751 case AArch64::STRDui:
1752 case AArch64::STRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001753 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1754 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1755 FrameIndex = MI.getOperand(1).getIndex();
1756 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001757 }
1758 break;
1759 }
1760 return 0;
1761}
1762
1763/// Return true if this is load/store scales or extends its register offset.
1764/// This refers to scaling a dynamic index as opposed to scaled immediates.
1765/// MI should be a memory op that allows scaled addressing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001766bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1767 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001768 default:
1769 break;
1770 case AArch64::LDRBBroW:
1771 case AArch64::LDRBroW:
1772 case AArch64::LDRDroW:
1773 case AArch64::LDRHHroW:
1774 case AArch64::LDRHroW:
1775 case AArch64::LDRQroW:
1776 case AArch64::LDRSBWroW:
1777 case AArch64::LDRSBXroW:
1778 case AArch64::LDRSHWroW:
1779 case AArch64::LDRSHXroW:
1780 case AArch64::LDRSWroW:
1781 case AArch64::LDRSroW:
1782 case AArch64::LDRWroW:
1783 case AArch64::LDRXroW:
1784 case AArch64::STRBBroW:
1785 case AArch64::STRBroW:
1786 case AArch64::STRDroW:
1787 case AArch64::STRHHroW:
1788 case AArch64::STRHroW:
1789 case AArch64::STRQroW:
1790 case AArch64::STRSroW:
1791 case AArch64::STRWroW:
1792 case AArch64::STRXroW:
1793 case AArch64::LDRBBroX:
1794 case AArch64::LDRBroX:
1795 case AArch64::LDRDroX:
1796 case AArch64::LDRHHroX:
1797 case AArch64::LDRHroX:
1798 case AArch64::LDRQroX:
1799 case AArch64::LDRSBWroX:
1800 case AArch64::LDRSBXroX:
1801 case AArch64::LDRSHWroX:
1802 case AArch64::LDRSHXroX:
1803 case AArch64::LDRSWroX:
1804 case AArch64::LDRSroX:
1805 case AArch64::LDRWroX:
1806 case AArch64::LDRXroX:
1807 case AArch64::STRBBroX:
1808 case AArch64::STRBroX:
1809 case AArch64::STRDroX:
1810 case AArch64::STRHHroX:
1811 case AArch64::STRHroX:
1812 case AArch64::STRQroX:
1813 case AArch64::STRSroX:
1814 case AArch64::STRWroX:
1815 case AArch64::STRXroX:
1816
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001817 unsigned Val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001818 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1819 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1820 }
1821 return false;
1822}
1823
1824/// Check all MachineMemOperands for a hint to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001825bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001826 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
Justin Lebar288b3372016-07-14 18:15:20 +00001827 return MMO->getFlags() & MOSuppressPair;
1828 });
Tim Northover3b0846e2014-05-24 12:50:23 +00001829}
1830
1831/// Set a flag on the first MachineMemOperand to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001832void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1833 if (MI.memoperands_empty())
Tim Northover3b0846e2014-05-24 12:50:23 +00001834 return;
Justin Lebar288b3372016-07-14 18:15:20 +00001835 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
Tim Northover3b0846e2014-05-24 12:50:23 +00001836}
1837
Geoff Berryb1e87142017-07-14 21:44:12 +00001838/// Check all MachineMemOperands for a hint that the load/store is strided.
1839bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const {
1840 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1841 return MMO->getFlags() & MOStridedAccess;
1842 });
1843}
1844
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001845bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1846 switch (Opc) {
1847 default:
1848 return false;
1849 case AArch64::STURSi:
1850 case AArch64::STURDi:
1851 case AArch64::STURQi:
1852 case AArch64::STURBBi:
1853 case AArch64::STURHHi:
1854 case AArch64::STURWi:
1855 case AArch64::STURXi:
1856 case AArch64::LDURSi:
1857 case AArch64::LDURDi:
1858 case AArch64::LDURQi:
1859 case AArch64::LDURWi:
1860 case AArch64::LDURXi:
1861 case AArch64::LDURSWi:
1862 case AArch64::LDURHHi:
1863 case AArch64::LDURBBi:
1864 case AArch64::LDURSBWi:
1865 case AArch64::LDURSHWi:
1866 return true;
1867 }
1868}
1869
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001870bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1871 return isUnscaledLdSt(MI.getOpcode());
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001872}
1873
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001874// Is this a candidate for ld/st merging or pairing? For example, we don't
1875// touch volatiles or load/stores that have a hint to avoid pair formation.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001876bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001877 // If this is a volatile load/store, don't mess with it.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001878 if (MI.hasOrderedMemoryRef())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001879 return false;
1880
1881 // Make sure this is a reg+imm (as opposed to an address reloc).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001882 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1883 if (!MI.getOperand(2).isImm())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001884 return false;
1885
1886 // Can't merge/pair if the instruction modifies the base register.
1887 // e.g., ldr x0, [x0]
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001888 unsigned BaseReg = MI.getOperand(1).getReg();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001889 const TargetRegisterInfo *TRI = &getRegisterInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001890 if (MI.modifiesRegister(BaseReg, TRI))
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001891 return false;
1892
1893 // Check if this load/store has a hint to avoid pair formation.
1894 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1895 if (isLdStPairSuppressed(MI))
1896 return false;
1897
Matthias Braun651cff42016-06-02 18:03:53 +00001898 // On some CPUs quad load/store pairs are slower than two single load/stores.
Evandro Menezes7784cac2017-01-24 17:34:31 +00001899 if (Subtarget.isPaired128Slow()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001900 switch (MI.getOpcode()) {
Matthias Braunbcfd2362016-05-28 01:06:51 +00001901 default:
1902 break;
Matthias Braunbcfd2362016-05-28 01:06:51 +00001903 case AArch64::LDURQi:
1904 case AArch64::STURQi:
1905 case AArch64::LDRQui:
1906 case AArch64::STRQui:
1907 return false;
Evandro Menezes8d53f882016-04-13 18:31:45 +00001908 }
Matthias Braunbcfd2362016-05-28 01:06:51 +00001909 }
Evandro Menezes8d53f882016-04-13 18:31:45 +00001910
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001911 return true;
1912}
1913
Chad Rosierc27a18f2016-03-09 16:00:35 +00001914bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001915 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
Chad Rosierc27a18f2016-03-09 16:00:35 +00001916 const TargetRegisterInfo *TRI) const {
Geoff Berry22dfbc52016-08-12 15:26:00 +00001917 unsigned Width;
1918 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001919}
1920
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001921bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001922 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
Chad Rosier3528c1e2014-09-08 14:43:48 +00001923 const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001924 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001925 // Handle only loads/stores with base register followed by immediate offset.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001926 if (LdSt.getNumExplicitOperands() == 3) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001927 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001928 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001929 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001930 } else if (LdSt.getNumExplicitOperands() == 4) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001931 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001932 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1933 !LdSt.getOperand(3).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001934 return false;
1935 } else
Chad Rosier3528c1e2014-09-08 14:43:48 +00001936 return false;
1937
Jessica Paquette809d7082017-07-28 03:21:58 +00001938 // Get the scaling factor for the instruction and set the width for the
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001939 // instruction.
Chad Rosier0da267d2016-03-09 16:46:48 +00001940 unsigned Scale = 0;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001941 int64_t Dummy1, Dummy2;
1942
1943 // If this returns false, then it's an instruction we don't want to handle.
1944 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
Chad Rosier3528c1e2014-09-08 14:43:48 +00001945 return false;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001946
1947 // Compute the offset. Offset is calculated as the immediate operand
1948 // multiplied by the scaling factor. Unscaled instructions have scaling factor
1949 // set to 1.
1950 if (LdSt.getNumExplicitOperands() == 3) {
1951 BaseReg = LdSt.getOperand(1).getReg();
1952 Offset = LdSt.getOperand(2).getImm() * Scale;
1953 } else {
1954 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1955 BaseReg = LdSt.getOperand(2).getReg();
1956 Offset = LdSt.getOperand(3).getImm() * Scale;
1957 }
1958 return true;
1959}
1960
Jessica Paquette809d7082017-07-28 03:21:58 +00001961MachineOperand &
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001962AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
1963 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Jessica Paquette809d7082017-07-28 03:21:58 +00001964 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001965 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1966 return OfsOp;
1967}
1968
1969bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1970 unsigned &Width, int64_t &MinOffset,
1971 int64_t &MaxOffset) const {
1972 switch (Opcode) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001973 // Not a memory operation or something we want to handle.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001974 default:
1975 Scale = Width = 0;
1976 MinOffset = MaxOffset = 0;
1977 return false;
1978 case AArch64::STRWpost:
1979 case AArch64::LDRWpost:
1980 Width = 32;
1981 Scale = 4;
1982 MinOffset = -256;
1983 MaxOffset = 255;
1984 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001985 case AArch64::LDURQi:
1986 case AArch64::STURQi:
1987 Width = 16;
1988 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001989 MinOffset = -256;
1990 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001991 break;
1992 case AArch64::LDURXi:
1993 case AArch64::LDURDi:
1994 case AArch64::STURXi:
1995 case AArch64::STURDi:
1996 Width = 8;
1997 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001998 MinOffset = -256;
1999 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002000 break;
2001 case AArch64::LDURWi:
2002 case AArch64::LDURSi:
2003 case AArch64::LDURSWi:
2004 case AArch64::STURWi:
2005 case AArch64::STURSi:
2006 Width = 4;
2007 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002008 MinOffset = -256;
2009 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002010 break;
2011 case AArch64::LDURHi:
2012 case AArch64::LDURHHi:
2013 case AArch64::LDURSHXi:
2014 case AArch64::LDURSHWi:
2015 case AArch64::STURHi:
2016 case AArch64::STURHHi:
2017 Width = 2;
2018 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002019 MinOffset = -256;
2020 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002021 break;
2022 case AArch64::LDURBi:
2023 case AArch64::LDURBBi:
2024 case AArch64::LDURSBXi:
2025 case AArch64::LDURSBWi:
2026 case AArch64::STURBi:
2027 case AArch64::STURBBi:
2028 Width = 1;
2029 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002030 MinOffset = -256;
2031 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002032 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002033 case AArch64::LDPQi:
2034 case AArch64::LDNPQi:
2035 case AArch64::STPQi:
2036 case AArch64::STNPQi:
2037 Scale = 16;
2038 Width = 32;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002039 MinOffset = -64;
2040 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002041 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00002042 case AArch64::LDRQui:
2043 case AArch64::STRQui:
2044 Scale = Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002045 MinOffset = 0;
2046 MaxOffset = 4095;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00002047 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002048 case AArch64::LDPXi:
2049 case AArch64::LDPDi:
2050 case AArch64::LDNPXi:
2051 case AArch64::LDNPDi:
2052 case AArch64::STPXi:
2053 case AArch64::STPDi:
2054 case AArch64::STNPXi:
2055 case AArch64::STNPDi:
2056 Scale = 8;
2057 Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002058 MinOffset = -64;
2059 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002060 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002061 case AArch64::LDRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00002062 case AArch64::LDRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002063 case AArch64::STRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00002064 case AArch64::STRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002065 Scale = Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002066 MinOffset = 0;
2067 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002068 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002069 case AArch64::LDPWi:
2070 case AArch64::LDPSi:
2071 case AArch64::LDNPWi:
2072 case AArch64::LDNPSi:
2073 case AArch64::STPWi:
2074 case AArch64::STPSi:
2075 case AArch64::STNPWi:
2076 case AArch64::STNPSi:
2077 Scale = 4;
2078 Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002079 MinOffset = -64;
2080 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002081 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002082 case AArch64::LDRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002083 case AArch64::LDRSui:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002084 case AArch64::LDRSWui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00002085 case AArch64::STRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002086 case AArch64::STRSui:
2087 Scale = Width = 4;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002088 MinOffset = 0;
2089 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002090 break;
Chad Rosier84a0afd2015-09-18 14:13:18 +00002091 case AArch64::LDRHui:
2092 case AArch64::LDRHHui:
2093 case AArch64::STRHui:
2094 case AArch64::STRHHui:
2095 Scale = Width = 2;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002096 MinOffset = 0;
2097 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002098 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00002099 case AArch64::LDRBui:
2100 case AArch64::LDRBBui:
2101 case AArch64::STRBui:
2102 case AArch64::STRBBui:
2103 Scale = Width = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002104 MinOffset = 0;
2105 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002106 break;
Chad Rosier064261d2016-02-01 20:54:36 +00002107 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00002108
Chad Rosier3528c1e2014-09-08 14:43:48 +00002109 return true;
2110}
2111
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002112// Scale the unscaled offsets. Returns false if the unscaled offset can't be
2113// scaled.
2114static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2115 unsigned OffsetStride = 1;
2116 switch (Opc) {
2117 default:
2118 return false;
2119 case AArch64::LDURQi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002120 case AArch64::STURQi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002121 OffsetStride = 16;
2122 break;
2123 case AArch64::LDURXi:
2124 case AArch64::LDURDi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002125 case AArch64::STURXi:
2126 case AArch64::STURDi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002127 OffsetStride = 8;
2128 break;
2129 case AArch64::LDURWi:
2130 case AArch64::LDURSi:
2131 case AArch64::LDURSWi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002132 case AArch64::STURWi:
2133 case AArch64::STURSi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002134 OffsetStride = 4;
2135 break;
2136 }
2137 // If the byte-offset isn't a multiple of the stride, we can't scale this
2138 // offset.
2139 if (Offset % OffsetStride != 0)
2140 return false;
2141
2142 // Convert the byte-offset used by unscaled into an "element" offset used
2143 // by the scaled pair load/store instructions.
2144 Offset /= OffsetStride;
2145 return true;
2146}
2147
2148static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2149 if (FirstOpc == SecondOpc)
2150 return true;
2151 // We can also pair sign-ext and zero-ext instructions.
2152 switch (FirstOpc) {
2153 default:
2154 return false;
2155 case AArch64::LDRWui:
2156 case AArch64::LDURWi:
2157 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2158 case AArch64::LDRSWui:
2159 case AArch64::LDURSWi:
2160 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2161 }
2162 // These instructions can't be paired based on their opcodes.
2163 return false;
2164}
2165
Tim Northover3b0846e2014-05-24 12:50:23 +00002166/// Detect opportunities for ldp/stp formation.
2167///
Sanjoy Dasb666ea32015-06-15 18:44:14 +00002168/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002169bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
Stanislav Mekhanoshin7fe9a5d2017-09-13 22:20:47 +00002170 unsigned BaseReg1,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002171 MachineInstr &SecondLdSt,
Stanislav Mekhanoshin7fe9a5d2017-09-13 22:20:47 +00002172 unsigned BaseReg2,
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002173 unsigned NumLoads) const {
Stanislav Mekhanoshin7fe9a5d2017-09-13 22:20:47 +00002174 if (BaseReg1 != BaseReg2)
2175 return false;
2176
Tim Northover3b0846e2014-05-24 12:50:23 +00002177 // Only cluster up to a single pair.
2178 if (NumLoads > 1)
2179 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002180
Geoff Berry22dfbc52016-08-12 15:26:00 +00002181 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2182 return false;
2183
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002184 // Can we pair these instructions based on their opcodes?
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002185 unsigned FirstOpc = FirstLdSt.getOpcode();
2186 unsigned SecondOpc = SecondLdSt.getOpcode();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002187 if (!canPairLdStOpc(FirstOpc, SecondOpc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002188 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002189
2190 // Can't merge volatiles or load/stores that have a hint to avoid pair
2191 // formation, for example.
2192 if (!isCandidateToMergeOrPair(FirstLdSt) ||
2193 !isCandidateToMergeOrPair(SecondLdSt))
Tim Northover3b0846e2014-05-24 12:50:23 +00002194 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002195
2196 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002197 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002198 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2199 return false;
2200
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002201 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002202 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2203 return false;
2204
2205 // Pairwise instructions have a 7-bit signed offset field.
2206 if (Offset1 > 63 || Offset1 < -64)
2207 return false;
2208
Tim Northover3b0846e2014-05-24 12:50:23 +00002209 // The caller should already have ordered First/SecondLdSt by offset.
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002210 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2211 return Offset1 + 1 == Offset2;
Tim Northover3b0846e2014-05-24 12:50:23 +00002212}
2213
Tim Northover3b0846e2014-05-24 12:50:23 +00002214static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2215 unsigned Reg, unsigned SubIdx,
2216 unsigned State,
2217 const TargetRegisterInfo *TRI) {
2218 if (!SubIdx)
2219 return MIB.addReg(Reg, State);
2220
2221 if (TargetRegisterInfo::isPhysicalRegister(Reg))
2222 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2223 return MIB.addReg(Reg, State, SubIdx);
2224}
2225
2226static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2227 unsigned NumRegs) {
2228 // We really want the positive remainder mod 32 here, that happens to be
2229 // easily obtainable with a mask.
2230 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2231}
2232
Jessica Paquette809d7082017-07-28 03:21:58 +00002233void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2234 MachineBasicBlock::iterator I,
2235 const DebugLoc &DL, unsigned DestReg,
2236 unsigned SrcReg, bool KillSrc,
2237 unsigned Opcode,
2238 ArrayRef<unsigned> Indices) const {
2239 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
Eric Christophera0de2532015-03-18 20:37:30 +00002240 const TargetRegisterInfo *TRI = &getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002241 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2242 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2243 unsigned NumRegs = Indices.size();
2244
2245 int SubReg = 0, End = NumRegs, Incr = 1;
2246 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2247 SubReg = NumRegs - 1;
2248 End = -1;
2249 Incr = -1;
2250 }
2251
2252 for (; SubReg != End; SubReg += Incr) {
James Molloyf8aa57a2015-04-16 11:37:40 +00002253 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
Tim Northover3b0846e2014-05-24 12:50:23 +00002254 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2255 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2256 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2257 }
2258}
2259
2260void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002261 MachineBasicBlock::iterator I,
2262 const DebugLoc &DL, unsigned DestReg,
2263 unsigned SrcReg, bool KillSrc) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002264 if (AArch64::GPR32spRegClass.contains(DestReg) &&
2265 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
Eric Christophera0de2532015-03-18 20:37:30 +00002266 const TargetRegisterInfo *TRI = &getRegisterInfo();
2267
Tim Northover3b0846e2014-05-24 12:50:23 +00002268 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2269 // If either operand is WSP, expand to ADD #0.
2270 if (Subtarget.hasZeroCycleRegMove()) {
2271 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2272 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2273 &AArch64::GPR64spRegClass);
2274 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2275 &AArch64::GPR64spRegClass);
2276 // This instruction is reading and writing X registers. This may upset
2277 // the register scavenger and machine verifier, so we need to indicate
2278 // that we are reading an undefined value from SrcRegX, but a proper
2279 // value from SrcReg.
2280 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2281 .addReg(SrcRegX, RegState::Undef)
2282 .addImm(0)
2283 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2284 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2285 } else {
2286 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2287 .addReg(SrcReg, getKillRegState(KillSrc))
2288 .addImm(0)
2289 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2290 }
2291 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002292 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2293 .addImm(0)
2294 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002295 } else {
2296 if (Subtarget.hasZeroCycleRegMove()) {
2297 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2298 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2299 &AArch64::GPR64spRegClass);
2300 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2301 &AArch64::GPR64spRegClass);
2302 // This instruction is reading and writing X registers. This may upset
2303 // the register scavenger and machine verifier, so we need to indicate
2304 // that we are reading an undefined value from SrcRegX, but a proper
2305 // value from SrcReg.
2306 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2307 .addReg(AArch64::XZR)
2308 .addReg(SrcRegX, RegState::Undef)
2309 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2310 } else {
2311 // Otherwise, expand to ORR WZR.
2312 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2313 .addReg(AArch64::WZR)
2314 .addReg(SrcReg, getKillRegState(KillSrc));
2315 }
2316 }
2317 return;
2318 }
2319
2320 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2321 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2322 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2323 // If either operand is SP, expand to ADD #0.
2324 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2325 .addReg(SrcReg, getKillRegState(KillSrc))
2326 .addImm(0)
2327 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2328 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002329 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2330 .addImm(0)
2331 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002332 } else {
2333 // Otherwise, expand to ORR XZR.
2334 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2335 .addReg(AArch64::XZR)
2336 .addReg(SrcReg, getKillRegState(KillSrc));
2337 }
2338 return;
2339 }
2340
2341 // Copy a DDDD register quad by copying the individual sub-registers.
2342 if (AArch64::DDDDRegClass.contains(DestReg) &&
2343 AArch64::DDDDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002344 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2345 AArch64::dsub2, AArch64::dsub3};
Tim Northover3b0846e2014-05-24 12:50:23 +00002346 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2347 Indices);
2348 return;
2349 }
2350
2351 // Copy a DDD register triple by copying the individual sub-registers.
2352 if (AArch64::DDDRegClass.contains(DestReg) &&
2353 AArch64::DDDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002354 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2355 AArch64::dsub2};
Tim Northover3b0846e2014-05-24 12:50:23 +00002356 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2357 Indices);
2358 return;
2359 }
2360
2361 // Copy a DD register pair by copying the individual sub-registers.
2362 if (AArch64::DDRegClass.contains(DestReg) &&
2363 AArch64::DDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002364 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
Tim Northover3b0846e2014-05-24 12:50:23 +00002365 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2366 Indices);
2367 return;
2368 }
2369
2370 // Copy a QQQQ register quad by copying the individual sub-registers.
2371 if (AArch64::QQQQRegClass.contains(DestReg) &&
2372 AArch64::QQQQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002373 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2374 AArch64::qsub2, AArch64::qsub3};
Tim Northover3b0846e2014-05-24 12:50:23 +00002375 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2376 Indices);
2377 return;
2378 }
2379
2380 // Copy a QQQ register triple by copying the individual sub-registers.
2381 if (AArch64::QQQRegClass.contains(DestReg) &&
2382 AArch64::QQQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002383 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2384 AArch64::qsub2};
Tim Northover3b0846e2014-05-24 12:50:23 +00002385 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2386 Indices);
2387 return;
2388 }
2389
2390 // Copy a QQ register pair by copying the individual sub-registers.
2391 if (AArch64::QQRegClass.contains(DestReg) &&
2392 AArch64::QQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002393 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
Tim Northover3b0846e2014-05-24 12:50:23 +00002394 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2395 Indices);
2396 return;
2397 }
2398
2399 if (AArch64::FPR128RegClass.contains(DestReg) &&
2400 AArch64::FPR128RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002401 if (Subtarget.hasNEON()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002402 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2403 .addReg(SrcReg)
2404 .addReg(SrcReg, getKillRegState(KillSrc));
2405 } else {
2406 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
Jessica Paquette809d7082017-07-28 03:21:58 +00002407 .addReg(AArch64::SP, RegState::Define)
2408 .addReg(SrcReg, getKillRegState(KillSrc))
2409 .addReg(AArch64::SP)
2410 .addImm(-16);
Tim Northover3b0846e2014-05-24 12:50:23 +00002411 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
Jessica Paquette809d7082017-07-28 03:21:58 +00002412 .addReg(AArch64::SP, RegState::Define)
2413 .addReg(DestReg, RegState::Define)
2414 .addReg(AArch64::SP)
2415 .addImm(16);
Tim Northover3b0846e2014-05-24 12:50:23 +00002416 }
2417 return;
2418 }
2419
2420 if (AArch64::FPR64RegClass.contains(DestReg) &&
2421 AArch64::FPR64RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002422 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002423 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2424 &AArch64::FPR128RegClass);
2425 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2426 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002427 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2428 .addReg(SrcReg)
2429 .addReg(SrcReg, getKillRegState(KillSrc));
2430 } else {
2431 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2432 .addReg(SrcReg, getKillRegState(KillSrc));
2433 }
2434 return;
2435 }
2436
2437 if (AArch64::FPR32RegClass.contains(DestReg) &&
2438 AArch64::FPR32RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002439 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002440 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2441 &AArch64::FPR128RegClass);
2442 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2443 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002444 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2445 .addReg(SrcReg)
2446 .addReg(SrcReg, getKillRegState(KillSrc));
2447 } else {
2448 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2449 .addReg(SrcReg, getKillRegState(KillSrc));
2450 }
2451 return;
2452 }
2453
2454 if (AArch64::FPR16RegClass.contains(DestReg) &&
2455 AArch64::FPR16RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002456 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002457 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2458 &AArch64::FPR128RegClass);
2459 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2460 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002461 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2462 .addReg(SrcReg)
2463 .addReg(SrcReg, getKillRegState(KillSrc));
2464 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002465 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2466 &AArch64::FPR32RegClass);
2467 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2468 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002469 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2470 .addReg(SrcReg, getKillRegState(KillSrc));
2471 }
2472 return;
2473 }
2474
2475 if (AArch64::FPR8RegClass.contains(DestReg) &&
2476 AArch64::FPR8RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002477 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002478 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
Tim Northover3b0846e2014-05-24 12:50:23 +00002479 &AArch64::FPR128RegClass);
Eric Christophera0de2532015-03-18 20:37:30 +00002480 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2481 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002482 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2483 .addReg(SrcReg)
2484 .addReg(SrcReg, getKillRegState(KillSrc));
2485 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002486 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2487 &AArch64::FPR32RegClass);
2488 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2489 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002490 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2491 .addReg(SrcReg, getKillRegState(KillSrc));
2492 }
2493 return;
2494 }
2495
2496 // Copies between GPR64 and FPR64.
2497 if (AArch64::FPR64RegClass.contains(DestReg) &&
2498 AArch64::GPR64RegClass.contains(SrcReg)) {
2499 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2500 .addReg(SrcReg, getKillRegState(KillSrc));
2501 return;
2502 }
2503 if (AArch64::GPR64RegClass.contains(DestReg) &&
2504 AArch64::FPR64RegClass.contains(SrcReg)) {
2505 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2506 .addReg(SrcReg, getKillRegState(KillSrc));
2507 return;
2508 }
2509 // Copies between GPR32 and FPR32.
2510 if (AArch64::FPR32RegClass.contains(DestReg) &&
2511 AArch64::GPR32RegClass.contains(SrcReg)) {
2512 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2513 .addReg(SrcReg, getKillRegState(KillSrc));
2514 return;
2515 }
2516 if (AArch64::GPR32RegClass.contains(DestReg) &&
2517 AArch64::FPR32RegClass.contains(SrcReg)) {
2518 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2519 .addReg(SrcReg, getKillRegState(KillSrc));
2520 return;
2521 }
2522
Tim Northover1bed9af2014-05-27 12:16:02 +00002523 if (DestReg == AArch64::NZCV) {
2524 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2525 BuildMI(MBB, I, DL, get(AArch64::MSR))
Jessica Paquette809d7082017-07-28 03:21:58 +00002526 .addImm(AArch64SysReg::NZCV)
2527 .addReg(SrcReg, getKillRegState(KillSrc))
2528 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
Tim Northover1bed9af2014-05-27 12:16:02 +00002529 return;
2530 }
2531
2532 if (SrcReg == AArch64::NZCV) {
2533 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
Quentin Colombet658d9db2016-04-22 18:46:17 +00002534 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
Jessica Paquette809d7082017-07-28 03:21:58 +00002535 .addImm(AArch64SysReg::NZCV)
2536 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
Tim Northover1bed9af2014-05-27 12:16:02 +00002537 return;
2538 }
2539
2540 llvm_unreachable("unimplemented reg-to-reg copy");
Tim Northover3b0846e2014-05-24 12:50:23 +00002541}
2542
2543void AArch64InstrInfo::storeRegToStackSlot(
2544 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2545 bool isKill, int FI, const TargetRegisterClass *RC,
2546 const TargetRegisterInfo *TRI) const {
2547 DebugLoc DL;
2548 if (MBBI != MBB.end())
2549 DL = MBBI->getDebugLoc();
2550 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002551 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002552 unsigned Align = MFI.getObjectAlignment(FI);
2553
Alex Lorenze40c8a22015-08-11 23:09:45 +00002554 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002555 MachineMemOperand *MMO = MF.getMachineMemOperand(
2556 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2557 unsigned Opc = 0;
2558 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002559 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002560 case 1:
2561 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2562 Opc = AArch64::STRBui;
2563 break;
2564 case 2:
2565 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2566 Opc = AArch64::STRHui;
2567 break;
2568 case 4:
2569 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2570 Opc = AArch64::STRWui;
2571 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2572 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2573 else
2574 assert(SrcReg != AArch64::WSP);
2575 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2576 Opc = AArch64::STRSui;
2577 break;
2578 case 8:
2579 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2580 Opc = AArch64::STRXui;
2581 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2582 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2583 else
2584 assert(SrcReg != AArch64::SP);
2585 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2586 Opc = AArch64::STRDui;
2587 break;
2588 case 16:
2589 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2590 Opc = AArch64::STRQui;
2591 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002592 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002593 Opc = AArch64::ST1Twov1d;
2594 Offset = false;
Oliver Stannarda9d2e002018-01-29 09:18:37 +00002595 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2596 BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
2597 .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
2598 getKillRegState(isKill))
2599 .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
2600 getKillRegState(isKill))
2601 .addFrameIndex(FI)
2602 .addImm(0)
2603 .addMemOperand(MMO);
2604 return;
Tim Northover3b0846e2014-05-24 12:50:23 +00002605 }
2606 break;
2607 case 24:
2608 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002609 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002610 Opc = AArch64::ST1Threev1d;
2611 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002612 }
2613 break;
2614 case 32:
2615 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002616 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002617 Opc = AArch64::ST1Fourv1d;
2618 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002619 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002620 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002621 Opc = AArch64::ST1Twov2d;
2622 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002623 }
2624 break;
2625 case 48:
2626 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002627 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002628 Opc = AArch64::ST1Threev2d;
2629 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002630 }
2631 break;
2632 case 64:
2633 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002634 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002635 Opc = AArch64::ST1Fourv2d;
2636 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002637 }
2638 break;
2639 }
2640 assert(Opc && "Unknown register class");
2641
James Molloyf8aa57a2015-04-16 11:37:40 +00002642 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Jessica Paquette809d7082017-07-28 03:21:58 +00002643 .addReg(SrcReg, getKillRegState(isKill))
2644 .addFrameIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002645
2646 if (Offset)
2647 MI.addImm(0);
2648 MI.addMemOperand(MMO);
2649}
2650
2651void AArch64InstrInfo::loadRegFromStackSlot(
2652 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2653 int FI, const TargetRegisterClass *RC,
2654 const TargetRegisterInfo *TRI) const {
2655 DebugLoc DL;
2656 if (MBBI != MBB.end())
2657 DL = MBBI->getDebugLoc();
2658 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002659 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002660 unsigned Align = MFI.getObjectAlignment(FI);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002661 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002662 MachineMemOperand *MMO = MF.getMachineMemOperand(
2663 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2664
2665 unsigned Opc = 0;
2666 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002667 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002668 case 1:
2669 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2670 Opc = AArch64::LDRBui;
2671 break;
2672 case 2:
2673 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2674 Opc = AArch64::LDRHui;
2675 break;
2676 case 4:
2677 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2678 Opc = AArch64::LDRWui;
2679 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2680 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2681 else
2682 assert(DestReg != AArch64::WSP);
2683 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2684 Opc = AArch64::LDRSui;
2685 break;
2686 case 8:
2687 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2688 Opc = AArch64::LDRXui;
2689 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2690 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2691 else
2692 assert(DestReg != AArch64::SP);
2693 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2694 Opc = AArch64::LDRDui;
2695 break;
2696 case 16:
2697 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2698 Opc = AArch64::LDRQui;
2699 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002700 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002701 Opc = AArch64::LD1Twov1d;
2702 Offset = false;
Oliver Stannarda9d2e002018-01-29 09:18:37 +00002703 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2704 BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
2705 .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
2706 getDefRegState(true))
2707 .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
2708 getDefRegState(true))
2709 .addFrameIndex(FI)
2710 .addImm(0)
2711 .addMemOperand(MMO);
2712 return;
Tim Northover3b0846e2014-05-24 12:50:23 +00002713 }
2714 break;
2715 case 24:
2716 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002717 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002718 Opc = AArch64::LD1Threev1d;
2719 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002720 }
2721 break;
2722 case 32:
2723 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002724 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002725 Opc = AArch64::LD1Fourv1d;
2726 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002727 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002728 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002729 Opc = AArch64::LD1Twov2d;
2730 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002731 }
2732 break;
2733 case 48:
2734 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002735 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002736 Opc = AArch64::LD1Threev2d;
2737 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002738 }
2739 break;
2740 case 64:
2741 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002742 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002743 Opc = AArch64::LD1Fourv2d;
2744 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002745 }
2746 break;
2747 }
2748 assert(Opc && "Unknown register class");
2749
James Molloyf8aa57a2015-04-16 11:37:40 +00002750 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Jessica Paquette809d7082017-07-28 03:21:58 +00002751 .addReg(DestReg, getDefRegState(true))
2752 .addFrameIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002753 if (Offset)
2754 MI.addImm(0);
2755 MI.addMemOperand(MMO);
2756}
2757
2758void llvm::emitFrameOffset(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002759 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00002760 unsigned DestReg, unsigned SrcReg, int Offset,
Eric Christopherbc76b972014-06-10 17:33:39 +00002761 const TargetInstrInfo *TII,
Tim Northover3b0846e2014-05-24 12:50:23 +00002762 MachineInstr::MIFlag Flag, bool SetNZCV) {
2763 if (DestReg == SrcReg && Offset == 0)
2764 return;
2765
Geoff Berrya5335642016-05-06 16:34:59 +00002766 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2767 "SP increment/decrement not 16-byte aligned");
2768
Tim Northover3b0846e2014-05-24 12:50:23 +00002769 bool isSub = Offset < 0;
2770 if (isSub)
2771 Offset = -Offset;
2772
2773 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2774 // scratch register. If DestReg is a virtual register, use it as the
2775 // scratch register; otherwise, create a new virtual register (to be
2776 // replaced by the scavenger at the end of PEI). That case can be optimized
2777 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2778 // register can be loaded with offset%8 and the add/sub can use an extending
2779 // instruction with LSL#3.
2780 // Currently the function handles any offsets but generates a poor sequence
2781 // of code.
2782 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2783
2784 unsigned Opc;
2785 if (SetNZCV)
2786 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2787 else
2788 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2789 const unsigned MaxEncoding = 0xfff;
2790 const unsigned ShiftSize = 12;
2791 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2792 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2793 unsigned ThisVal;
2794 if (((unsigned)Offset) > MaxEncodableValue) {
2795 ThisVal = MaxEncodableValue;
2796 } else {
2797 ThisVal = Offset & MaxEncodableValue;
2798 }
2799 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2800 "Encoding cannot handle value that big");
2801 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2802 .addReg(SrcReg)
2803 .addImm(ThisVal >> ShiftSize)
2804 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2805 .setMIFlag(Flag);
2806
2807 SrcReg = DestReg;
2808 Offset -= ThisVal;
2809 if (Offset == 0)
2810 return;
2811 }
2812 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2813 .addReg(SrcReg)
2814 .addImm(Offset)
2815 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2816 .setMIFlag(Flag);
2817}
2818
Keno Fischere70b31f2015-06-08 20:09:58 +00002819MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002820 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
Jonas Paulsson8e5b0c62016-05-10 08:09:37 +00002821 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2822 LiveIntervals *LIS) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002823 // This is a bit of a hack. Consider this instruction:
2824 //
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00002825 // %0 = COPY %sp; GPR64all:%0
Tim Northover3b0846e2014-05-24 12:50:23 +00002826 //
2827 // We explicitly chose GPR64all for the virtual register so such a copy might
2828 // be eliminated by RegisterCoalescer. However, that may not be possible, and
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002829 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
Tim Northover3b0846e2014-05-24 12:50:23 +00002830 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2831 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002832 // To prevent that, we are going to constrain the %0 register class here.
Tim Northover3b0846e2014-05-24 12:50:23 +00002833 //
2834 // <rdar://problem/11522048>
2835 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002836 if (MI.isFullCopy()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002837 unsigned DstReg = MI.getOperand(0).getReg();
2838 unsigned SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00002839 if (SrcReg == AArch64::SP &&
2840 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2841 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2842 return nullptr;
2843 }
2844 if (DstReg == AArch64::SP &&
2845 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2846 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2847 return nullptr;
2848 }
2849 }
2850
Geoff Berryd46b6e82017-01-05 21:51:42 +00002851 // Handle the case where a copy is being spilled or filled but the source
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002852 // and destination register class don't match. For example:
Geoff Berry7c078fc2016-11-29 18:28:32 +00002853 //
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00002854 // %0 = COPY %xzr; GPR64common:%0
Geoff Berry7c078fc2016-11-29 18:28:32 +00002855 //
2856 // In this case we can still safely fold away the COPY and generate the
2857 // following spill code:
2858 //
Francis Visoiu Mistrih0b5bdce2017-12-15 16:33:45 +00002859 // STRXui %xzr, %stack.0
Geoff Berry7c078fc2016-11-29 18:28:32 +00002860 //
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002861 // This also eliminates spilled cross register class COPYs (e.g. between x and
2862 // d regs) of the same size. For example:
2863 //
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00002864 // %0 = COPY %1; GPR64:%0, FPR64:%1
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002865 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002866 // will be filled as
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002867 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002868 // LDRDui %0, fi<#0>
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002869 //
2870 // instead of
2871 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002872 // LDRXui %Temp, fi<#0>
2873 // %0 = FMOV %Temp
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002874 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002875 if (MI.isCopy() && Ops.size() == 1 &&
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002876 // Make sure we're only folding the explicit COPY defs/uses.
2877 (Ops[0] == 0 || Ops[0] == 1)) {
Geoff Berryd46b6e82017-01-05 21:51:42 +00002878 bool IsSpill = Ops[0] == 0;
2879 bool IsFill = !IsSpill;
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002880 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
2881 const MachineRegisterInfo &MRI = MF.getRegInfo();
Geoff Berry7c078fc2016-11-29 18:28:32 +00002882 MachineBasicBlock &MBB = *MI.getParent();
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002883 const MachineOperand &DstMO = MI.getOperand(0);
Geoff Berry7c078fc2016-11-29 18:28:32 +00002884 const MachineOperand &SrcMO = MI.getOperand(1);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002885 unsigned DstReg = DstMO.getReg();
Geoff Berry7c078fc2016-11-29 18:28:32 +00002886 unsigned SrcReg = SrcMO.getReg();
Geoff Berryd46b6e82017-01-05 21:51:42 +00002887 // This is slightly expensive to compute for physical regs since
2888 // getMinimalPhysRegClass is slow.
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002889 auto getRegClass = [&](unsigned Reg) {
2890 return TargetRegisterInfo::isVirtualRegister(Reg)
2891 ? MRI.getRegClass(Reg)
2892 : TRI.getMinimalPhysRegClass(Reg);
2893 };
Geoff Berryd46b6e82017-01-05 21:51:42 +00002894
2895 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002896 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
Jessica Paquette809d7082017-07-28 03:21:58 +00002897 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00002898 "Mismatched register size in non subreg COPY");
2899 if (IsSpill)
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002900 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
Geoff Berryd46b6e82017-01-05 21:51:42 +00002901 getRegClass(SrcReg), &TRI);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002902 else
Geoff Berryd46b6e82017-01-05 21:51:42 +00002903 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
2904 getRegClass(DstReg), &TRI);
Geoff Berry7c078fc2016-11-29 18:28:32 +00002905 return &*--InsertPt;
2906 }
Geoff Berryd46b6e82017-01-05 21:51:42 +00002907
2908 // Handle cases like spilling def of:
2909 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002910 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
Geoff Berryd46b6e82017-01-05 21:51:42 +00002911 //
2912 // where the physical register source can be widened and stored to the full
2913 // virtual reg destination stack slot, in this case producing:
2914 //
Francis Visoiu Mistrih0b5bdce2017-12-15 16:33:45 +00002915 // STRXui %xzr, %stack.0
Geoff Berryd46b6e82017-01-05 21:51:42 +00002916 //
2917 if (IsSpill && DstMO.isUndef() &&
2918 TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2919 assert(SrcMO.getSubReg() == 0 &&
2920 "Unexpected subreg on physical register");
2921 const TargetRegisterClass *SpillRC;
2922 unsigned SpillSubreg;
2923 switch (DstMO.getSubReg()) {
2924 default:
2925 SpillRC = nullptr;
2926 break;
2927 case AArch64::sub_32:
2928 case AArch64::ssub:
2929 if (AArch64::GPR32RegClass.contains(SrcReg)) {
2930 SpillRC = &AArch64::GPR64RegClass;
2931 SpillSubreg = AArch64::sub_32;
2932 } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
2933 SpillRC = &AArch64::FPR64RegClass;
2934 SpillSubreg = AArch64::ssub;
2935 } else
2936 SpillRC = nullptr;
2937 break;
2938 case AArch64::dsub:
2939 if (AArch64::FPR64RegClass.contains(SrcReg)) {
2940 SpillRC = &AArch64::FPR128RegClass;
2941 SpillSubreg = AArch64::dsub;
2942 } else
2943 SpillRC = nullptr;
2944 break;
2945 }
2946
2947 if (SpillRC)
2948 if (unsigned WidenedSrcReg =
2949 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
2950 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
2951 FrameIndex, SpillRC, &TRI);
2952 return &*--InsertPt;
2953 }
2954 }
2955
2956 // Handle cases like filling use of:
2957 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002958 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
Geoff Berryd46b6e82017-01-05 21:51:42 +00002959 //
2960 // where we can load the full virtual reg source stack slot, into the subreg
2961 // destination, in this case producing:
2962 //
Francis Visoiu Mistrih0b5bdce2017-12-15 16:33:45 +00002963 // LDRWui %0:sub_32<def,read-undef>, %stack.0
Geoff Berryd46b6e82017-01-05 21:51:42 +00002964 //
2965 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
2966 const TargetRegisterClass *FillRC;
2967 switch (DstMO.getSubReg()) {
2968 default:
2969 FillRC = nullptr;
2970 break;
2971 case AArch64::sub_32:
2972 FillRC = &AArch64::GPR32RegClass;
2973 break;
2974 case AArch64::ssub:
2975 FillRC = &AArch64::FPR32RegClass;
2976 break;
2977 case AArch64::dsub:
2978 FillRC = &AArch64::FPR64RegClass;
2979 break;
2980 }
2981
2982 if (FillRC) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002983 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
2984 TRI.getRegSizeInBits(*FillRC) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00002985 "Mismatched regclass size on folded subreg COPY");
2986 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
2987 MachineInstr &LoadMI = *--InsertPt;
2988 MachineOperand &LoadDst = LoadMI.getOperand(0);
2989 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
2990 LoadDst.setSubReg(DstMO.getSubReg());
2991 LoadDst.setIsUndef();
2992 return &LoadMI;
2993 }
2994 }
Geoff Berry7c078fc2016-11-29 18:28:32 +00002995 }
2996
Tim Northover3b0846e2014-05-24 12:50:23 +00002997 // Cannot fold.
2998 return nullptr;
2999}
3000
3001int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
3002 bool *OutUseUnscaledOp,
3003 unsigned *OutUnscaledOp,
3004 int *EmittableOffset) {
3005 int Scale = 1;
3006 bool IsSigned = false;
3007 // The ImmIdx should be changed case by case if it is not 2.
3008 unsigned ImmIdx = 2;
3009 unsigned UnscaledOp = 0;
3010 // Set output values in case of early exit.
3011 if (EmittableOffset)
3012 *EmittableOffset = 0;
3013 if (OutUseUnscaledOp)
3014 *OutUseUnscaledOp = false;
3015 if (OutUnscaledOp)
3016 *OutUnscaledOp = 0;
3017 switch (MI.getOpcode()) {
3018 default:
Craig Topper2a30d782014-06-18 05:05:13 +00003019 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
Tim Northover3b0846e2014-05-24 12:50:23 +00003020 // Vector spills/fills can't take an immediate offset.
3021 case AArch64::LD1Twov2d:
3022 case AArch64::LD1Threev2d:
3023 case AArch64::LD1Fourv2d:
3024 case AArch64::LD1Twov1d:
3025 case AArch64::LD1Threev1d:
3026 case AArch64::LD1Fourv1d:
3027 case AArch64::ST1Twov2d:
3028 case AArch64::ST1Threev2d:
3029 case AArch64::ST1Fourv2d:
3030 case AArch64::ST1Twov1d:
3031 case AArch64::ST1Threev1d:
3032 case AArch64::ST1Fourv1d:
3033 return AArch64FrameOffsetCannotUpdate;
3034 case AArch64::PRFMui:
3035 Scale = 8;
3036 UnscaledOp = AArch64::PRFUMi;
3037 break;
3038 case AArch64::LDRXui:
3039 Scale = 8;
3040 UnscaledOp = AArch64::LDURXi;
3041 break;
3042 case AArch64::LDRWui:
3043 Scale = 4;
3044 UnscaledOp = AArch64::LDURWi;
3045 break;
3046 case AArch64::LDRBui:
3047 Scale = 1;
3048 UnscaledOp = AArch64::LDURBi;
3049 break;
3050 case AArch64::LDRHui:
3051 Scale = 2;
3052 UnscaledOp = AArch64::LDURHi;
3053 break;
3054 case AArch64::LDRSui:
3055 Scale = 4;
3056 UnscaledOp = AArch64::LDURSi;
3057 break;
3058 case AArch64::LDRDui:
3059 Scale = 8;
3060 UnscaledOp = AArch64::LDURDi;
3061 break;
3062 case AArch64::LDRQui:
3063 Scale = 16;
3064 UnscaledOp = AArch64::LDURQi;
3065 break;
3066 case AArch64::LDRBBui:
3067 Scale = 1;
3068 UnscaledOp = AArch64::LDURBBi;
3069 break;
3070 case AArch64::LDRHHui:
3071 Scale = 2;
3072 UnscaledOp = AArch64::LDURHHi;
3073 break;
3074 case AArch64::LDRSBXui:
3075 Scale = 1;
3076 UnscaledOp = AArch64::LDURSBXi;
3077 break;
3078 case AArch64::LDRSBWui:
3079 Scale = 1;
3080 UnscaledOp = AArch64::LDURSBWi;
3081 break;
3082 case AArch64::LDRSHXui:
3083 Scale = 2;
3084 UnscaledOp = AArch64::LDURSHXi;
3085 break;
3086 case AArch64::LDRSHWui:
3087 Scale = 2;
3088 UnscaledOp = AArch64::LDURSHWi;
3089 break;
3090 case AArch64::LDRSWui:
3091 Scale = 4;
3092 UnscaledOp = AArch64::LDURSWi;
3093 break;
3094
3095 case AArch64::STRXui:
3096 Scale = 8;
3097 UnscaledOp = AArch64::STURXi;
3098 break;
3099 case AArch64::STRWui:
3100 Scale = 4;
3101 UnscaledOp = AArch64::STURWi;
3102 break;
3103 case AArch64::STRBui:
3104 Scale = 1;
3105 UnscaledOp = AArch64::STURBi;
3106 break;
3107 case AArch64::STRHui:
3108 Scale = 2;
3109 UnscaledOp = AArch64::STURHi;
3110 break;
3111 case AArch64::STRSui:
3112 Scale = 4;
3113 UnscaledOp = AArch64::STURSi;
3114 break;
3115 case AArch64::STRDui:
3116 Scale = 8;
3117 UnscaledOp = AArch64::STURDi;
3118 break;
3119 case AArch64::STRQui:
3120 Scale = 16;
3121 UnscaledOp = AArch64::STURQi;
3122 break;
3123 case AArch64::STRBBui:
3124 Scale = 1;
3125 UnscaledOp = AArch64::STURBBi;
3126 break;
3127 case AArch64::STRHHui:
3128 Scale = 2;
3129 UnscaledOp = AArch64::STURHHi;
3130 break;
3131
3132 case AArch64::LDPXi:
3133 case AArch64::LDPDi:
3134 case AArch64::STPXi:
3135 case AArch64::STPDi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003136 case AArch64::LDNPXi:
3137 case AArch64::LDNPDi:
3138 case AArch64::STNPXi:
3139 case AArch64::STNPDi:
3140 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003141 IsSigned = true;
3142 Scale = 8;
3143 break;
3144 case AArch64::LDPQi:
3145 case AArch64::STPQi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003146 case AArch64::LDNPQi:
3147 case AArch64::STNPQi:
3148 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003149 IsSigned = true;
3150 Scale = 16;
3151 break;
3152 case AArch64::LDPWi:
3153 case AArch64::LDPSi:
3154 case AArch64::STPWi:
3155 case AArch64::STPSi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003156 case AArch64::LDNPWi:
3157 case AArch64::LDNPSi:
3158 case AArch64::STNPWi:
3159 case AArch64::STNPSi:
3160 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003161 IsSigned = true;
3162 Scale = 4;
3163 break;
3164
3165 case AArch64::LDURXi:
3166 case AArch64::LDURWi:
3167 case AArch64::LDURBi:
3168 case AArch64::LDURHi:
3169 case AArch64::LDURSi:
3170 case AArch64::LDURDi:
3171 case AArch64::LDURQi:
3172 case AArch64::LDURHHi:
3173 case AArch64::LDURBBi:
3174 case AArch64::LDURSBXi:
3175 case AArch64::LDURSBWi:
3176 case AArch64::LDURSHXi:
3177 case AArch64::LDURSHWi:
3178 case AArch64::LDURSWi:
3179 case AArch64::STURXi:
3180 case AArch64::STURWi:
3181 case AArch64::STURBi:
3182 case AArch64::STURHi:
3183 case AArch64::STURSi:
3184 case AArch64::STURDi:
3185 case AArch64::STURQi:
3186 case AArch64::STURBBi:
3187 case AArch64::STURHHi:
3188 Scale = 1;
3189 break;
3190 }
3191
3192 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3193
3194 bool useUnscaledOp = false;
3195 // If the offset doesn't match the scale, we rewrite the instruction to
3196 // use the unscaled instruction instead. Likewise, if we have a negative
3197 // offset (and have an unscaled op to use).
3198 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3199 useUnscaledOp = true;
3200
3201 // Use an unscaled addressing mode if the instruction has a negative offset
3202 // (or if the instruction is already using an unscaled addressing mode).
3203 unsigned MaskBits;
3204 if (IsSigned) {
3205 // ldp/stp instructions.
3206 MaskBits = 7;
3207 Offset /= Scale;
3208 } else if (UnscaledOp == 0 || useUnscaledOp) {
3209 MaskBits = 9;
3210 IsSigned = true;
3211 Scale = 1;
3212 } else {
3213 MaskBits = 12;
3214 IsSigned = false;
3215 Offset /= Scale;
3216 }
3217
3218 // Attempt to fold address computation.
3219 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3220 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3221 if (Offset >= MinOff && Offset <= MaxOff) {
3222 if (EmittableOffset)
3223 *EmittableOffset = Offset;
3224 Offset = 0;
3225 } else {
3226 int NewOff = Offset < 0 ? MinOff : MaxOff;
3227 if (EmittableOffset)
3228 *EmittableOffset = NewOff;
3229 Offset = (Offset - NewOff) * Scale;
3230 }
3231 if (OutUseUnscaledOp)
3232 *OutUseUnscaledOp = useUnscaledOp;
3233 if (OutUnscaledOp)
3234 *OutUnscaledOp = UnscaledOp;
3235 return AArch64FrameOffsetCanUpdate |
3236 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3237}
3238
3239bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3240 unsigned FrameReg, int &Offset,
3241 const AArch64InstrInfo *TII) {
3242 unsigned Opcode = MI.getOpcode();
3243 unsigned ImmIdx = FrameRegIdx + 1;
3244
3245 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3246 Offset += MI.getOperand(ImmIdx).getImm();
3247 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3248 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3249 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3250 MI.eraseFromParent();
3251 Offset = 0;
3252 return true;
3253 }
3254
3255 int NewOffset;
3256 unsigned UnscaledOp;
3257 bool UseUnscaledOp;
3258 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3259 &UnscaledOp, &NewOffset);
3260 if (Status & AArch64FrameOffsetCanUpdate) {
3261 if (Status & AArch64FrameOffsetIsLegal)
3262 // Replace the FrameIndex with FrameReg.
3263 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3264 if (UseUnscaledOp)
3265 MI.setDesc(TII->get(UnscaledOp));
3266
3267 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3268 return Offset == 0;
3269 }
3270
3271 return false;
3272}
3273
Hans Wennborg9b9a5352017-04-21 21:48:41 +00003274void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00003275 NopInst.setOpcode(AArch64::HINT);
Jim Grosbache9119e42015-05-13 18:37:00 +00003276 NopInst.addOperand(MCOperand::createImm(0));
Tim Northover3b0846e2014-05-24 12:50:23 +00003277}
Chad Rosier9d1a5562016-05-02 14:56:21 +00003278
3279// AArch64 supports MachineCombiner.
Jessica Paquette809d7082017-07-28 03:21:58 +00003280bool AArch64InstrInfo::useMachineCombiner() const { return true; }
Eugene Zelenko049b0172017-01-06 00:30:53 +00003281
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003282// True when Opc sets flag
3283static bool isCombineInstrSettingFlag(unsigned Opc) {
3284 switch (Opc) {
3285 case AArch64::ADDSWrr:
3286 case AArch64::ADDSWri:
3287 case AArch64::ADDSXrr:
3288 case AArch64::ADDSXri:
3289 case AArch64::SUBSWrr:
3290 case AArch64::SUBSXrr:
3291 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3292 case AArch64::SUBSWri:
3293 case AArch64::SUBSXri:
3294 return true;
3295 default:
3296 break;
3297 }
3298 return false;
3299}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003300
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003301// 32b Opcodes that can be combined with a MUL
3302static bool isCombineInstrCandidate32(unsigned Opc) {
3303 switch (Opc) {
3304 case AArch64::ADDWrr:
3305 case AArch64::ADDWri:
3306 case AArch64::SUBWrr:
3307 case AArch64::ADDSWrr:
3308 case AArch64::ADDSWri:
3309 case AArch64::SUBSWrr:
3310 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3311 case AArch64::SUBWri:
3312 case AArch64::SUBSWri:
3313 return true;
3314 default:
3315 break;
3316 }
3317 return false;
3318}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003319
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003320// 64b Opcodes that can be combined with a MUL
3321static bool isCombineInstrCandidate64(unsigned Opc) {
3322 switch (Opc) {
3323 case AArch64::ADDXrr:
3324 case AArch64::ADDXri:
3325 case AArch64::SUBXrr:
3326 case AArch64::ADDSXrr:
3327 case AArch64::ADDSXri:
3328 case AArch64::SUBSXrr:
3329 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3330 case AArch64::SUBXri:
3331 case AArch64::SUBSXri:
3332 return true;
3333 default:
3334 break;
3335 }
3336 return false;
3337}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003338
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003339// FP Opcodes that can be combined with a FMUL
3340static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3341 switch (Inst.getOpcode()) {
Evandro Menezes19b2aed2016-09-15 19:55:23 +00003342 default:
3343 break;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003344 case AArch64::FADDSrr:
3345 case AArch64::FADDDrr:
3346 case AArch64::FADDv2f32:
3347 case AArch64::FADDv2f64:
3348 case AArch64::FADDv4f32:
3349 case AArch64::FSUBSrr:
3350 case AArch64::FSUBDrr:
3351 case AArch64::FSUBv2f32:
3352 case AArch64::FSUBv2f64:
3353 case AArch64::FSUBv4f32:
Logan Chience542ee2017-01-05 23:41:33 +00003354 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3355 return (Options.UnsafeFPMath ||
3356 Options.AllowFPOpFusion == FPOpFusion::Fast);
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003357 }
3358 return false;
3359}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003360
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003361// Opcodes that can be combined with a MUL
3362static bool isCombineInstrCandidate(unsigned Opc) {
3363 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3364}
3365
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003366//
3367// Utility routine that checks if \param MO is defined by an
3368// \param CombineOpc instruction in the basic block \param MBB
3369static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3370 unsigned CombineOpc, unsigned ZeroReg = 0,
3371 bool CheckZeroReg = false) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003372 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3373 MachineInstr *MI = nullptr;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003374
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003375 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3376 MI = MRI.getUniqueVRegDef(MO.getReg());
3377 // And it needs to be in the trace (otherwise, it won't have a depth).
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003378 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003379 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003380 // Must only used by the user we combine with.
Gerolf Hoflehnerfe2c11f2014-08-13 22:07:36 +00003381 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003382 return false;
3383
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003384 if (CheckZeroReg) {
3385 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3386 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3387 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3388 // The third input reg must be zero.
3389 if (MI->getOperand(3).getReg() != ZeroReg)
3390 return false;
3391 }
3392
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003393 return true;
3394}
3395
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003396//
3397// Is \param MO defined by an integer multiply and can be combined?
3398static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3399 unsigned MulOpc, unsigned ZeroReg) {
3400 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3401}
3402
3403//
3404// Is \param MO defined by a floating-point multiply and can be combined?
3405static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3406 unsigned MulOpc) {
3407 return canCombine(MBB, MO, MulOpc);
3408}
3409
Haicheng Wu08b94622016-01-07 04:01:02 +00003410// TODO: There are many more machine instruction opcodes to match:
3411// 1. Other data types (integer, vectors)
3412// 2. Other math / logic operations (xor, or)
3413// 3. Other forms of the same operation (intrinsics and other variants)
Jessica Paquette809d7082017-07-28 03:21:58 +00003414bool AArch64InstrInfo::isAssociativeAndCommutative(
3415 const MachineInstr &Inst) const {
Haicheng Wu08b94622016-01-07 04:01:02 +00003416 switch (Inst.getOpcode()) {
3417 case AArch64::FADDDrr:
3418 case AArch64::FADDSrr:
3419 case AArch64::FADDv2f32:
3420 case AArch64::FADDv2f64:
3421 case AArch64::FADDv4f32:
3422 case AArch64::FMULDrr:
3423 case AArch64::FMULSrr:
3424 case AArch64::FMULX32:
3425 case AArch64::FMULX64:
3426 case AArch64::FMULXv2f32:
3427 case AArch64::FMULXv2f64:
3428 case AArch64::FMULXv4f32:
3429 case AArch64::FMULv2f32:
3430 case AArch64::FMULv2f64:
3431 case AArch64::FMULv4f32:
3432 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3433 default:
3434 return false;
3435 }
3436}
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003437
Haicheng Wu08b94622016-01-07 04:01:02 +00003438/// Find instructions that can be turned into madd.
3439static bool getMaddPatterns(MachineInstr &Root,
3440 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003441 unsigned Opc = Root.getOpcode();
3442 MachineBasicBlock &MBB = *Root.getParent();
3443 bool Found = false;
3444
3445 if (!isCombineInstrCandidate(Opc))
Chad Rosier85c85942016-03-23 20:07:28 +00003446 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003447 if (isCombineInstrSettingFlag(Opc)) {
3448 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3449 // When NZCV is live bail out.
3450 if (Cmp_NZCV == -1)
Chad Rosier85c85942016-03-23 20:07:28 +00003451 return false;
Chad Rosier6db9ff62017-06-23 19:20:12 +00003452 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003453 // When opcode can't change bail out.
3454 // CHECKME: do we miss any cases for opcode conversion?
3455 if (NewOpc == Opc)
Chad Rosier85c85942016-03-23 20:07:28 +00003456 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003457 Opc = NewOpc;
3458 }
3459
3460 switch (Opc) {
3461 default:
3462 break;
3463 case AArch64::ADDWrr:
3464 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3465 "ADDWrr does not have register operands");
3466 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3467 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003468 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003469 Found = true;
3470 }
3471 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3472 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003473 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003474 Found = true;
3475 }
3476 break;
3477 case AArch64::ADDXrr:
3478 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3479 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003480 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003481 Found = true;
3482 }
3483 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3484 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003485 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003486 Found = true;
3487 }
3488 break;
3489 case AArch64::SUBWrr:
3490 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3491 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003492 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003493 Found = true;
3494 }
3495 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3496 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003497 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003498 Found = true;
3499 }
3500 break;
3501 case AArch64::SUBXrr:
3502 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3503 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003504 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003505 Found = true;
3506 }
3507 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3508 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003509 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003510 Found = true;
3511 }
3512 break;
3513 case AArch64::ADDWri:
3514 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3515 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003516 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003517 Found = true;
3518 }
3519 break;
3520 case AArch64::ADDXri:
3521 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3522 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003523 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003524 Found = true;
3525 }
3526 break;
3527 case AArch64::SUBWri:
3528 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3529 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003530 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003531 Found = true;
3532 }
3533 break;
3534 case AArch64::SUBXri:
3535 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3536 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003537 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003538 Found = true;
3539 }
3540 break;
3541 }
3542 return Found;
3543}
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003544/// Floating-Point Support
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003545
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003546/// Find instructions that can be turned into madd.
3547static bool getFMAPatterns(MachineInstr &Root,
3548 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3549
3550 if (!isCombineInstrCandidateFP(Root))
Eugene Zelenko049b0172017-01-06 00:30:53 +00003551 return false;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003552
3553 MachineBasicBlock &MBB = *Root.getParent();
3554 bool Found = false;
3555
3556 switch (Root.getOpcode()) {
3557 default:
3558 assert(false && "Unsupported FP instruction in combiner\n");
3559 break;
3560 case AArch64::FADDSrr:
3561 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3562 "FADDWrr does not have register operands");
3563 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3564 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3565 Found = true;
3566 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3567 AArch64::FMULv1i32_indexed)) {
3568 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3569 Found = true;
3570 }
3571 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3572 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3573 Found = true;
3574 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3575 AArch64::FMULv1i32_indexed)) {
3576 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3577 Found = true;
3578 }
3579 break;
3580 case AArch64::FADDDrr:
3581 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3582 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3583 Found = true;
3584 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3585 AArch64::FMULv1i64_indexed)) {
3586 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3587 Found = true;
3588 }
3589 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3590 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3591 Found = true;
3592 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3593 AArch64::FMULv1i64_indexed)) {
3594 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3595 Found = true;
3596 }
3597 break;
3598 case AArch64::FADDv2f32:
3599 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3600 AArch64::FMULv2i32_indexed)) {
3601 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3602 Found = true;
3603 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3604 AArch64::FMULv2f32)) {
3605 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3606 Found = true;
3607 }
3608 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3609 AArch64::FMULv2i32_indexed)) {
3610 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3611 Found = true;
3612 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3613 AArch64::FMULv2f32)) {
3614 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3615 Found = true;
3616 }
3617 break;
3618 case AArch64::FADDv2f64:
3619 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3620 AArch64::FMULv2i64_indexed)) {
3621 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3622 Found = true;
3623 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3624 AArch64::FMULv2f64)) {
3625 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3626 Found = true;
3627 }
3628 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3629 AArch64::FMULv2i64_indexed)) {
3630 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3631 Found = true;
3632 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3633 AArch64::FMULv2f64)) {
3634 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3635 Found = true;
3636 }
3637 break;
3638 case AArch64::FADDv4f32:
3639 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3640 AArch64::FMULv4i32_indexed)) {
3641 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3642 Found = true;
3643 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3644 AArch64::FMULv4f32)) {
3645 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3646 Found = true;
3647 }
3648 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3649 AArch64::FMULv4i32_indexed)) {
3650 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3651 Found = true;
3652 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3653 AArch64::FMULv4f32)) {
3654 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3655 Found = true;
3656 }
3657 break;
3658
3659 case AArch64::FSUBSrr:
3660 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3661 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3662 Found = true;
3663 }
3664 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3665 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3666 Found = true;
3667 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3668 AArch64::FMULv1i32_indexed)) {
3669 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3670 Found = true;
3671 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00003672 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3673 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3674 Found = true;
3675 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003676 break;
3677 case AArch64::FSUBDrr:
3678 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3679 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3680 Found = true;
3681 }
3682 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3683 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3684 Found = true;
3685 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3686 AArch64::FMULv1i64_indexed)) {
3687 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3688 Found = true;
3689 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00003690 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3691 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3692 Found = true;
3693 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003694 break;
3695 case AArch64::FSUBv2f32:
3696 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3697 AArch64::FMULv2i32_indexed)) {
3698 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3699 Found = true;
3700 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3701 AArch64::FMULv2f32)) {
3702 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3703 Found = true;
3704 }
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003705 if (canCombineWithFMUL(MBB, Root.getOperand(1),
Matthew Simpson9439f542017-12-27 15:25:01 +00003706 AArch64::FMULv2i32_indexed)) {
3707 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003708 Found = true;
3709 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
Matthew Simpson9439f542017-12-27 15:25:01 +00003710 AArch64::FMULv2f32)) {
3711 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003712 Found = true;
3713 }
Matthew Simpson9439f542017-12-27 15:25:01 +00003714 break;
3715 case AArch64::FSUBv2f64:
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003716 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3717 AArch64::FMULv2i64_indexed)) {
3718 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3719 Found = true;
3720 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3721 AArch64::FMULv2f64)) {
3722 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3723 Found = true;
3724 }
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003725 if (canCombineWithFMUL(MBB, Root.getOperand(1),
Matthew Simpson9439f542017-12-27 15:25:01 +00003726 AArch64::FMULv2i64_indexed)) {
3727 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003728 Found = true;
3729 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
Matthew Simpson9439f542017-12-27 15:25:01 +00003730 AArch64::FMULv2f64)) {
3731 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003732 Found = true;
3733 }
Matthew Simpson9439f542017-12-27 15:25:01 +00003734 break;
3735 case AArch64::FSUBv4f32:
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003736 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3737 AArch64::FMULv4i32_indexed)) {
3738 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3739 Found = true;
3740 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3741 AArch64::FMULv4f32)) {
3742 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3743 Found = true;
3744 }
Matthew Simpson9439f542017-12-27 15:25:01 +00003745 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3746 AArch64::FMULv4i32_indexed)) {
3747 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3748 Found = true;
3749 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3750 AArch64::FMULv4f32)) {
3751 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
3752 Found = true;
3753 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003754 break;
3755 }
3756 return Found;
3757}
3758
3759/// Return true when a code sequence can improve throughput. It
3760/// should be called only for instructions in loops.
3761/// \param Pattern - combiner pattern
Jessica Paquette809d7082017-07-28 03:21:58 +00003762bool AArch64InstrInfo::isThroughputPattern(
3763 MachineCombinerPattern Pattern) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003764 switch (Pattern) {
3765 default:
3766 break;
3767 case MachineCombinerPattern::FMULADDS_OP1:
3768 case MachineCombinerPattern::FMULADDS_OP2:
3769 case MachineCombinerPattern::FMULSUBS_OP1:
3770 case MachineCombinerPattern::FMULSUBS_OP2:
3771 case MachineCombinerPattern::FMULADDD_OP1:
3772 case MachineCombinerPattern::FMULADDD_OP2:
3773 case MachineCombinerPattern::FMULSUBD_OP1:
3774 case MachineCombinerPattern::FMULSUBD_OP2:
Chad Rosieraeffffd2017-05-11 20:07:24 +00003775 case MachineCombinerPattern::FNMULSUBS_OP1:
3776 case MachineCombinerPattern::FNMULSUBD_OP1:
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003777 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3778 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3779 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3780 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3781 case MachineCombinerPattern::FMLAv2f32_OP2:
3782 case MachineCombinerPattern::FMLAv2f32_OP1:
3783 case MachineCombinerPattern::FMLAv2f64_OP1:
3784 case MachineCombinerPattern::FMLAv2f64_OP2:
3785 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3786 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3787 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3788 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3789 case MachineCombinerPattern::FMLAv4f32_OP1:
3790 case MachineCombinerPattern::FMLAv4f32_OP2:
3791 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3792 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3793 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3794 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3795 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3796 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3797 case MachineCombinerPattern::FMLSv2f32_OP2:
3798 case MachineCombinerPattern::FMLSv2f64_OP2:
3799 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3800 case MachineCombinerPattern::FMLSv4f32_OP2:
3801 return true;
3802 } // end switch (Pattern)
3803 return false;
3804}
Haicheng Wu08b94622016-01-07 04:01:02 +00003805/// Return true when there is potentially a faster code sequence for an
3806/// instruction chain ending in \p Root. All potential patterns are listed in
3807/// the \p Pattern vector. Pattern should be sorted in priority order since the
3808/// pattern evaluator stops checking as soon as it finds a faster sequence.
3809
3810bool AArch64InstrInfo::getMachineCombinerPatterns(
3811 MachineInstr &Root,
3812 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003813 // Integer patterns
Haicheng Wu08b94622016-01-07 04:01:02 +00003814 if (getMaddPatterns(Root, Patterns))
3815 return true;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003816 // Floating point patterns
3817 if (getFMAPatterns(Root, Patterns))
3818 return true;
Haicheng Wu08b94622016-01-07 04:01:02 +00003819
3820 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3821}
3822
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003823enum class FMAInstKind { Default, Indexed, Accumulator };
3824/// genFusedMultiply - Generate fused multiply instructions.
3825/// This function supports both integer and floating point instructions.
3826/// A typical example:
3827/// F|MUL I=A,B,0
3828/// F|ADD R,I,C
3829/// ==> F|MADD R,A,B,C
Joel Jones7466ccf2017-07-10 22:11:50 +00003830/// \param MF Containing MachineFunction
3831/// \param MRI Register information
3832/// \param TII Target information
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003833/// \param Root is the F|ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003834/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003835/// contain the generated madd instruction
3836/// \param IdxMulOpd is index of operand in Root that is the result of
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003837/// the F|MUL. In the example above IdxMulOpd is 1.
3838/// \param MaddOpc the opcode fo the f|madd instruction
Joel Jones7466ccf2017-07-10 22:11:50 +00003839/// \param RC Register class of operands
3840/// \param kind of fma instruction (addressing mode) to be generated
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003841/// \param ReplacedAddend is the result register from the instruction
3842/// replacing the non-combined operand, if any.
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003843static MachineInstr *
3844genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3845 const TargetInstrInfo *TII, MachineInstr &Root,
3846 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3847 unsigned MaddOpc, const TargetRegisterClass *RC,
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003848 FMAInstKind kind = FMAInstKind::Default,
3849 const unsigned *ReplacedAddend = nullptr) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003850 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3851
3852 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3853 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003854 unsigned ResultReg = Root.getOperand(0).getReg();
3855 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3856 bool Src0IsKill = MUL->getOperand(1).isKill();
3857 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3858 bool Src1IsKill = MUL->getOperand(2).isKill();
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003859
3860 unsigned SrcReg2;
3861 bool Src2IsKill;
3862 if (ReplacedAddend) {
3863 // If we just generated a new addend, we must be it's only use.
3864 SrcReg2 = *ReplacedAddend;
3865 Src2IsKill = true;
3866 } else {
3867 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3868 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3869 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003870
3871 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3872 MRI.constrainRegClass(ResultReg, RC);
3873 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3874 MRI.constrainRegClass(SrcReg0, RC);
3875 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3876 MRI.constrainRegClass(SrcReg1, RC);
3877 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3878 MRI.constrainRegClass(SrcReg2, RC);
3879
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003880 MachineInstrBuilder MIB;
3881 if (kind == FMAInstKind::Default)
3882 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3883 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3884 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3885 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3886 else if (kind == FMAInstKind::Indexed)
3887 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3888 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3889 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3890 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3891 .addImm(MUL->getOperand(3).getImm());
3892 else if (kind == FMAInstKind::Accumulator)
3893 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3894 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3895 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3896 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3897 else
3898 assert(false && "Invalid FMA instruction kind \n");
3899 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003900 InsInstrs.push_back(MIB);
3901 return MUL;
3902}
3903
3904/// genMaddR - Generate madd instruction and combine mul and add using
3905/// an extra virtual register
3906/// Example - an ADD intermediate needs to be stored in a register:
3907/// MUL I=A,B,0
3908/// ADD R,I,Imm
3909/// ==> ORR V, ZR, Imm
3910/// ==> MADD R,A,B,V
Joel Jones7466ccf2017-07-10 22:11:50 +00003911/// \param MF Containing MachineFunction
3912/// \param MRI Register information
3913/// \param TII Target information
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003914/// \param Root is the ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003915/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003916/// contain the generated madd instruction
3917/// \param IdxMulOpd is index of operand in Root that is the result of
3918/// the MUL. In the example above IdxMulOpd is 1.
3919/// \param MaddOpc the opcode fo the madd instruction
3920/// \param VR is a virtual register that holds the value of an ADD operand
3921/// (V in the example above).
Joel Jones7466ccf2017-07-10 22:11:50 +00003922/// \param RC Register class of operands
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003923static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3924 const TargetInstrInfo *TII, MachineInstr &Root,
3925 SmallVectorImpl<MachineInstr *> &InsInstrs,
Jessica Paquette809d7082017-07-28 03:21:58 +00003926 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
3927 const TargetRegisterClass *RC) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003928 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3929
3930 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003931 unsigned ResultReg = Root.getOperand(0).getReg();
3932 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3933 bool Src0IsKill = MUL->getOperand(1).isKill();
3934 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3935 bool Src1IsKill = MUL->getOperand(2).isKill();
3936
3937 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3938 MRI.constrainRegClass(ResultReg, RC);
3939 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3940 MRI.constrainRegClass(SrcReg0, RC);
3941 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3942 MRI.constrainRegClass(SrcReg1, RC);
3943 if (TargetRegisterInfo::isVirtualRegister(VR))
3944 MRI.constrainRegClass(VR, RC);
3945
Jessica Paquette809d7082017-07-28 03:21:58 +00003946 MachineInstrBuilder MIB =
3947 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3948 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3949 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3950 .addReg(VR);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003951 // Insert the MADD
3952 InsInstrs.push_back(MIB);
3953 return MUL;
3954}
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003955
Sanjay Patelcfe03932015-06-19 23:21:42 +00003956/// When getMachineCombinerPatterns() finds potential patterns,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003957/// this function generates the instructions that could replace the
3958/// original code sequence
3959void AArch64InstrInfo::genAlternativeCodeSequence(
Sanjay Patel387e66e2015-11-05 19:34:57 +00003960 MachineInstr &Root, MachineCombinerPattern Pattern,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003961 SmallVectorImpl<MachineInstr *> &InsInstrs,
3962 SmallVectorImpl<MachineInstr *> &DelInstrs,
3963 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3964 MachineBasicBlock &MBB = *Root.getParent();
3965 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3966 MachineFunction &MF = *MBB.getParent();
Eric Christophere0818912014-09-03 20:36:26 +00003967 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003968
3969 MachineInstr *MUL;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003970 const TargetRegisterClass *RC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003971 unsigned Opc;
3972 switch (Pattern) {
3973 default:
Haicheng Wu08b94622016-01-07 04:01:02 +00003974 // Reassociate instructions.
3975 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3976 DelInstrs, InstrIdxForVirtReg);
3977 return;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003978 case MachineCombinerPattern::MULADDW_OP1:
3979 case MachineCombinerPattern::MULADDX_OP1:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003980 // MUL I=A,B,0
3981 // ADD R,I,C
3982 // ==> MADD R,A,B,C
3983 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003984 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003985 Opc = AArch64::MADDWrrr;
3986 RC = &AArch64::GPR32RegClass;
3987 } else {
3988 Opc = AArch64::MADDXrrr;
3989 RC = &AArch64::GPR64RegClass;
3990 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003991 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003992 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003993 case MachineCombinerPattern::MULADDW_OP2:
3994 case MachineCombinerPattern::MULADDX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003995 // MUL I=A,B,0
3996 // ADD R,C,I
3997 // ==> MADD R,A,B,C
3998 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003999 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004000 Opc = AArch64::MADDWrrr;
4001 RC = &AArch64::GPR32RegClass;
4002 } else {
4003 Opc = AArch64::MADDXrrr;
4004 RC = &AArch64::GPR64RegClass;
4005 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004006 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004007 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004008 case MachineCombinerPattern::MULADDWI_OP1:
4009 case MachineCombinerPattern::MULADDXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004010 // MUL I=A,B,0
4011 // ADD R,I,Imm
4012 // ==> ORR V, ZR, Imm
4013 // ==> MADD R,A,B,V
4014 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004015 const TargetRegisterClass *OrrRC;
4016 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004017 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004018 OrrOpc = AArch64::ORRWri;
4019 OrrRC = &AArch64::GPR32spRegClass;
4020 BitSize = 32;
4021 ZeroReg = AArch64::WZR;
4022 Opc = AArch64::MADDWrrr;
4023 RC = &AArch64::GPR32RegClass;
4024 } else {
4025 OrrOpc = AArch64::ORRXri;
4026 OrrRC = &AArch64::GPR64spRegClass;
4027 BitSize = 64;
4028 ZeroReg = AArch64::XZR;
4029 Opc = AArch64::MADDXrrr;
4030 RC = &AArch64::GPR64RegClass;
4031 }
4032 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4033 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004034
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004035 if (Root.getOperand(3).isImm()) {
4036 unsigned Val = Root.getOperand(3).getImm();
4037 Imm = Imm << Val;
4038 }
David Majnemer1182dd82016-07-21 23:46:56 +00004039 uint64_t UImm = SignExtend64(Imm, BitSize);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004040 uint64_t Encoding;
4041 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4042 MachineInstrBuilder MIB1 =
4043 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4044 .addReg(ZeroReg)
4045 .addImm(Encoding);
4046 InsInstrs.push_back(MIB1);
4047 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4048 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004049 }
4050 break;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004051 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00004052 case MachineCombinerPattern::MULSUBW_OP1:
4053 case MachineCombinerPattern::MULSUBX_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004054 // MUL I=A,B,0
4055 // SUB R,I, C
4056 // ==> SUB V, 0, C
4057 // ==> MADD R,A,B,V // = -C + A*B
4058 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004059 const TargetRegisterClass *SubRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004060 unsigned SubOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004061 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004062 SubOpc = AArch64::SUBWrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004063 SubRC = &AArch64::GPR32spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004064 ZeroReg = AArch64::WZR;
4065 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004066 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004067 } else {
4068 SubOpc = AArch64::SUBXrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004069 SubRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004070 ZeroReg = AArch64::XZR;
4071 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004072 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004073 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004074 unsigned NewVR = MRI.createVirtualRegister(SubRC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004075 // SUB NewVR, 0, C
4076 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004077 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004078 .addReg(ZeroReg)
Diana Picus116bbab2017-01-13 09:58:52 +00004079 .add(Root.getOperand(2));
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004080 InsInstrs.push_back(MIB1);
4081 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004082 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4083 break;
4084 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00004085 case MachineCombinerPattern::MULSUBW_OP2:
4086 case MachineCombinerPattern::MULSUBX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004087 // MUL I=A,B,0
4088 // SUB R,C,I
4089 // ==> MSUB R,A,B,C (computes C - A*B)
4090 // --- Create(MSUB);
Sanjay Patel387e66e2015-11-05 19:34:57 +00004091 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004092 Opc = AArch64::MSUBWrrr;
4093 RC = &AArch64::GPR32RegClass;
4094 } else {
4095 Opc = AArch64::MSUBXrrr;
4096 RC = &AArch64::GPR64RegClass;
4097 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004098 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004099 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004100 case MachineCombinerPattern::MULSUBWI_OP1:
4101 case MachineCombinerPattern::MULSUBXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004102 // MUL I=A,B,0
4103 // SUB R,I, Imm
4104 // ==> ORR V, ZR, -Imm
4105 // ==> MADD R,A,B,V // = -Imm + A*B
4106 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004107 const TargetRegisterClass *OrrRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004108 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004109 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
Juergen Ributzka25816b02014-08-30 06:16:26 +00004110 OrrOpc = AArch64::ORRWri;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004111 OrrRC = &AArch64::GPR32spRegClass;
4112 BitSize = 32;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004113 ZeroReg = AArch64::WZR;
4114 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004115 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004116 } else {
4117 OrrOpc = AArch64::ORRXri;
Juergen Ributzkaf9660f02014-11-04 22:20:07 +00004118 OrrRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004119 BitSize = 64;
4120 ZeroReg = AArch64::XZR;
4121 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004122 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004123 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004124 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
David Majnemer1182dd82016-07-21 23:46:56 +00004125 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004126 if (Root.getOperand(3).isImm()) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004127 unsigned Val = Root.getOperand(3).getImm();
4128 Imm = Imm << Val;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004129 }
David Majnemer1182dd82016-07-21 23:46:56 +00004130 uint64_t UImm = SignExtend64(-Imm, BitSize);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004131 uint64_t Encoding;
4132 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4133 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004134 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004135 .addReg(ZeroReg)
4136 .addImm(Encoding);
4137 InsInstrs.push_back(MIB1);
4138 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004139 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004140 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004141 break;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004142 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004143 // Floating Point Support
4144 case MachineCombinerPattern::FMULADDS_OP1:
4145 case MachineCombinerPattern::FMULADDD_OP1:
4146 // MUL I=A,B,0
4147 // ADD R,I,C
4148 // ==> MADD R,A,B,C
4149 // --- Create(MADD);
4150 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4151 Opc = AArch64::FMADDSrrr;
4152 RC = &AArch64::FPR32RegClass;
4153 } else {
4154 Opc = AArch64::FMADDDrrr;
4155 RC = &AArch64::FPR64RegClass;
4156 }
4157 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4158 break;
4159 case MachineCombinerPattern::FMULADDS_OP2:
4160 case MachineCombinerPattern::FMULADDD_OP2:
4161 // FMUL I=A,B,0
4162 // FADD R,C,I
4163 // ==> FMADD R,A,B,C
4164 // --- Create(FMADD);
4165 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4166 Opc = AArch64::FMADDSrrr;
4167 RC = &AArch64::FPR32RegClass;
4168 } else {
4169 Opc = AArch64::FMADDDrrr;
4170 RC = &AArch64::FPR64RegClass;
4171 }
4172 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4173 break;
4174
4175 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4176 Opc = AArch64::FMLAv1i32_indexed;
4177 RC = &AArch64::FPR32RegClass;
4178 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4179 FMAInstKind::Indexed);
4180 break;
4181 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4182 Opc = AArch64::FMLAv1i32_indexed;
4183 RC = &AArch64::FPR32RegClass;
4184 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4185 FMAInstKind::Indexed);
4186 break;
4187
4188 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4189 Opc = AArch64::FMLAv1i64_indexed;
4190 RC = &AArch64::FPR64RegClass;
4191 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4192 FMAInstKind::Indexed);
4193 break;
4194 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4195 Opc = AArch64::FMLAv1i64_indexed;
4196 RC = &AArch64::FPR64RegClass;
4197 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4198 FMAInstKind::Indexed);
4199 break;
4200
4201 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4202 case MachineCombinerPattern::FMLAv2f32_OP1:
4203 RC = &AArch64::FPR64RegClass;
4204 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4205 Opc = AArch64::FMLAv2i32_indexed;
4206 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4207 FMAInstKind::Indexed);
4208 } else {
4209 Opc = AArch64::FMLAv2f32;
4210 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4211 FMAInstKind::Accumulator);
4212 }
4213 break;
4214 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4215 case MachineCombinerPattern::FMLAv2f32_OP2:
4216 RC = &AArch64::FPR64RegClass;
4217 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4218 Opc = AArch64::FMLAv2i32_indexed;
4219 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4220 FMAInstKind::Indexed);
4221 } else {
4222 Opc = AArch64::FMLAv2f32;
4223 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4224 FMAInstKind::Accumulator);
4225 }
4226 break;
4227
4228 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4229 case MachineCombinerPattern::FMLAv2f64_OP1:
4230 RC = &AArch64::FPR128RegClass;
4231 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4232 Opc = AArch64::FMLAv2i64_indexed;
4233 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4234 FMAInstKind::Indexed);
4235 } else {
4236 Opc = AArch64::FMLAv2f64;
4237 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4238 FMAInstKind::Accumulator);
4239 }
4240 break;
4241 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4242 case MachineCombinerPattern::FMLAv2f64_OP2:
4243 RC = &AArch64::FPR128RegClass;
4244 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4245 Opc = AArch64::FMLAv2i64_indexed;
4246 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4247 FMAInstKind::Indexed);
4248 } else {
4249 Opc = AArch64::FMLAv2f64;
4250 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4251 FMAInstKind::Accumulator);
4252 }
4253 break;
4254
4255 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4256 case MachineCombinerPattern::FMLAv4f32_OP1:
4257 RC = &AArch64::FPR128RegClass;
4258 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4259 Opc = AArch64::FMLAv4i32_indexed;
4260 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4261 FMAInstKind::Indexed);
4262 } else {
4263 Opc = AArch64::FMLAv4f32;
4264 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4265 FMAInstKind::Accumulator);
4266 }
4267 break;
4268
4269 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4270 case MachineCombinerPattern::FMLAv4f32_OP2:
4271 RC = &AArch64::FPR128RegClass;
4272 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4273 Opc = AArch64::FMLAv4i32_indexed;
4274 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4275 FMAInstKind::Indexed);
4276 } else {
4277 Opc = AArch64::FMLAv4f32;
4278 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4279 FMAInstKind::Accumulator);
4280 }
4281 break;
4282
4283 case MachineCombinerPattern::FMULSUBS_OP1:
4284 case MachineCombinerPattern::FMULSUBD_OP1: {
4285 // FMUL I=A,B,0
4286 // FSUB R,I,C
4287 // ==> FNMSUB R,A,B,C // = -C + A*B
4288 // --- Create(FNMSUB);
4289 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4290 Opc = AArch64::FNMSUBSrrr;
4291 RC = &AArch64::FPR32RegClass;
4292 } else {
4293 Opc = AArch64::FNMSUBDrrr;
4294 RC = &AArch64::FPR64RegClass;
4295 }
4296 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4297 break;
4298 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00004299
4300 case MachineCombinerPattern::FNMULSUBS_OP1:
4301 case MachineCombinerPattern::FNMULSUBD_OP1: {
4302 // FNMUL I=A,B,0
4303 // FSUB R,I,C
4304 // ==> FNMADD R,A,B,C // = -A*B - C
4305 // --- Create(FNMADD);
4306 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4307 Opc = AArch64::FNMADDSrrr;
4308 RC = &AArch64::FPR32RegClass;
4309 } else {
4310 Opc = AArch64::FNMADDDrrr;
4311 RC = &AArch64::FPR64RegClass;
4312 }
4313 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4314 break;
4315 }
4316
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004317 case MachineCombinerPattern::FMULSUBS_OP2:
4318 case MachineCombinerPattern::FMULSUBD_OP2: {
4319 // FMUL I=A,B,0
4320 // FSUB R,C,I
4321 // ==> FMSUB R,A,B,C (computes C - A*B)
4322 // --- Create(FMSUB);
4323 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4324 Opc = AArch64::FMSUBSrrr;
4325 RC = &AArch64::FPR32RegClass;
4326 } else {
4327 Opc = AArch64::FMSUBDrrr;
4328 RC = &AArch64::FPR64RegClass;
4329 }
4330 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4331 break;
Chad Rosier8b12a032017-05-16 12:43:23 +00004332 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004333
4334 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4335 Opc = AArch64::FMLSv1i32_indexed;
4336 RC = &AArch64::FPR32RegClass;
4337 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4338 FMAInstKind::Indexed);
4339 break;
4340
4341 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4342 Opc = AArch64::FMLSv1i64_indexed;
4343 RC = &AArch64::FPR64RegClass;
4344 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4345 FMAInstKind::Indexed);
4346 break;
4347
4348 case MachineCombinerPattern::FMLSv2f32_OP2:
4349 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4350 RC = &AArch64::FPR64RegClass;
4351 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4352 Opc = AArch64::FMLSv2i32_indexed;
4353 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4354 FMAInstKind::Indexed);
4355 } else {
4356 Opc = AArch64::FMLSv2f32;
4357 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4358 FMAInstKind::Accumulator);
4359 }
4360 break;
4361
4362 case MachineCombinerPattern::FMLSv2f64_OP2:
4363 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4364 RC = &AArch64::FPR128RegClass;
4365 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4366 Opc = AArch64::FMLSv2i64_indexed;
4367 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4368 FMAInstKind::Indexed);
4369 } else {
4370 Opc = AArch64::FMLSv2f64;
4371 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4372 FMAInstKind::Accumulator);
4373 }
4374 break;
4375
4376 case MachineCombinerPattern::FMLSv4f32_OP2:
4377 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4378 RC = &AArch64::FPR128RegClass;
4379 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4380 Opc = AArch64::FMLSv4i32_indexed;
4381 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4382 FMAInstKind::Indexed);
4383 } else {
4384 Opc = AArch64::FMLSv4f32;
4385 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4386 FMAInstKind::Accumulator);
4387 }
4388 break;
Florian Hahn5d6a4e42017-12-06 22:48:36 +00004389 case MachineCombinerPattern::FMLSv2f32_OP1:
4390 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4391 RC = &AArch64::FPR64RegClass;
4392 unsigned NewVR = MRI.createVirtualRegister(RC);
4393 MachineInstrBuilder MIB1 =
4394 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4395 .add(Root.getOperand(2));
4396 InsInstrs.push_back(MIB1);
4397 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4398 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4399 Opc = AArch64::FMLAv2i32_indexed;
4400 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4401 FMAInstKind::Indexed, &NewVR);
4402 } else {
4403 Opc = AArch64::FMLAv2f32;
4404 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4405 FMAInstKind::Accumulator, &NewVR);
4406 }
4407 break;
4408 }
4409 case MachineCombinerPattern::FMLSv4f32_OP1:
4410 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4411 RC = &AArch64::FPR128RegClass;
4412 unsigned NewVR = MRI.createVirtualRegister(RC);
4413 MachineInstrBuilder MIB1 =
4414 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4415 .add(Root.getOperand(2));
4416 InsInstrs.push_back(MIB1);
4417 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4418 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4419 Opc = AArch64::FMLAv4i32_indexed;
4420 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4421 FMAInstKind::Indexed, &NewVR);
4422 } else {
4423 Opc = AArch64::FMLAv4f32;
4424 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4425 FMAInstKind::Accumulator, &NewVR);
4426 }
4427 break;
4428 }
4429 case MachineCombinerPattern::FMLSv2f64_OP1:
4430 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4431 RC = &AArch64::FPR128RegClass;
4432 unsigned NewVR = MRI.createVirtualRegister(RC);
4433 MachineInstrBuilder MIB1 =
4434 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4435 .add(Root.getOperand(2));
4436 InsInstrs.push_back(MIB1);
4437 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4438 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4439 Opc = AArch64::FMLAv2i64_indexed;
4440 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4441 FMAInstKind::Indexed, &NewVR);
4442 } else {
4443 Opc = AArch64::FMLAv2f64;
4444 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4445 FMAInstKind::Accumulator, &NewVR);
4446 }
4447 break;
4448 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004449 } // end switch (Pattern)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004450 // Record MUL and ADD/SUB for deletion
4451 DelInstrs.push_back(MUL);
4452 DelInstrs.push_back(&Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004453}
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004454
4455/// \brief Replace csincr-branch sequence by simple conditional branch
4456///
4457/// Examples:
Joel Jonesaff09bf2017-07-06 14:17:36 +00004458/// 1. \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004459/// csinc w9, wzr, wzr, <condition code>
4460/// tbnz w9, #0, 0x44
Joel Jonesaff09bf2017-07-06 14:17:36 +00004461/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004462/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004463/// \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004464/// b.<inverted condition code>
Joel Jonesaff09bf2017-07-06 14:17:36 +00004465/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004466///
Joel Jonesaff09bf2017-07-06 14:17:36 +00004467/// 2. \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004468/// csinc w9, wzr, wzr, <condition code>
4469/// tbz w9, #0, 0x44
Joel Jonesaff09bf2017-07-06 14:17:36 +00004470/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004471/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004472/// \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004473/// b.<condition code>
Joel Jonesaff09bf2017-07-06 14:17:36 +00004474/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004475///
Chad Rosier4aeab5f2016-03-21 13:43:58 +00004476/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4477/// compare's constant operand is power of 2.
Balaram Makame9b27252016-03-10 17:54:55 +00004478///
4479/// Examples:
Joel Jonesaff09bf2017-07-06 14:17:36 +00004480/// \code
Balaram Makame9b27252016-03-10 17:54:55 +00004481/// and w8, w8, #0x400
4482/// cbnz w8, L1
Joel Jonesaff09bf2017-07-06 14:17:36 +00004483/// \endcode
Balaram Makame9b27252016-03-10 17:54:55 +00004484/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004485/// \code
Balaram Makame9b27252016-03-10 17:54:55 +00004486/// tbnz w8, #10, L1
Joel Jonesaff09bf2017-07-06 14:17:36 +00004487/// \endcode
Balaram Makame9b27252016-03-10 17:54:55 +00004488///
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004489/// \param MI Conditional Branch
4490/// \return True when the simple conditional branch is generated
4491///
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004492bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004493 bool IsNegativeBranch = false;
4494 bool IsTestAndBranch = false;
4495 unsigned TargetBBInMI = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004496 switch (MI.getOpcode()) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004497 default:
4498 llvm_unreachable("Unknown branch instruction?");
4499 case AArch64::Bcc:
4500 return false;
4501 case AArch64::CBZW:
4502 case AArch64::CBZX:
4503 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004504 break;
4505 case AArch64::CBNZW:
4506 case AArch64::CBNZX:
4507 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004508 IsNegativeBranch = true;
4509 break;
4510 case AArch64::TBZW:
4511 case AArch64::TBZX:
4512 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004513 IsTestAndBranch = true;
4514 break;
4515 case AArch64::TBNZW:
4516 case AArch64::TBNZX:
4517 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004518 IsNegativeBranch = true;
4519 IsTestAndBranch = true;
4520 break;
4521 }
4522 // So we increment a zero register and test for bits other
4523 // than bit 0? Conservatively bail out in case the verifier
4524 // missed this case.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004525 if (IsTestAndBranch && MI.getOperand(1).getImm())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004526 return false;
4527
4528 // Find Definition.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004529 assert(MI.getParent() && "Incomplete machine instruciton\n");
4530 MachineBasicBlock *MBB = MI.getParent();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004531 MachineFunction *MF = MBB->getParent();
4532 MachineRegisterInfo *MRI = &MF->getRegInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004533 unsigned VReg = MI.getOperand(0).getReg();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004534 if (!TargetRegisterInfo::isVirtualRegister(VReg))
4535 return false;
4536
4537 MachineInstr *DefMI = MRI->getVRegDef(VReg);
4538
Balaram Makame9b27252016-03-10 17:54:55 +00004539 // Look through COPY instructions to find definition.
4540 while (DefMI->isCopy()) {
4541 unsigned CopyVReg = DefMI->getOperand(1).getReg();
4542 if (!MRI->hasOneNonDBGUse(CopyVReg))
4543 return false;
4544 if (!MRI->hasOneDef(CopyVReg))
4545 return false;
4546 DefMI = MRI->getVRegDef(CopyVReg);
4547 }
4548
4549 switch (DefMI->getOpcode()) {
4550 default:
4551 return false;
4552 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4553 case AArch64::ANDWri:
4554 case AArch64::ANDXri: {
4555 if (IsTestAndBranch)
4556 return false;
4557 if (DefMI->getParent() != MBB)
4558 return false;
4559 if (!MRI->hasOneNonDBGUse(VReg))
4560 return false;
4561
Quentin Colombetabe2d012016-04-25 20:54:08 +00004562 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
Balaram Makame9b27252016-03-10 17:54:55 +00004563 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
Quentin Colombetabe2d012016-04-25 20:54:08 +00004564 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
Balaram Makame9b27252016-03-10 17:54:55 +00004565 if (!isPowerOf2_64(Mask))
4566 return false;
4567
4568 MachineOperand &MO = DefMI->getOperand(1);
4569 unsigned NewReg = MO.getReg();
4570 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4571 return false;
4572
4573 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4574
4575 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004576 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4577 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004578 unsigned Imm = Log2_64(Mask);
Renato Golin179d1f52016-04-23 19:30:52 +00004579 unsigned Opc = (Imm < 32)
4580 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4581 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004582 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4583 .addReg(NewReg)
4584 .addImm(Imm)
4585 .addMBB(TBB);
Matthias Braune25bbd02016-05-03 04:54:16 +00004586 // Register lives on to the CBZ now.
4587 MO.setIsKill(false);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004588
4589 // For immediate smaller than 32, we need to use the 32-bit
4590 // variant (W) in all cases. Indeed the 64-bit variant does not
4591 // allow to encode them.
4592 // Therefore, if the input register is 64-bit, we need to take the
4593 // 32-bit sub-part.
4594 if (!Is32Bit && Imm < 32)
4595 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004596 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004597 return true;
4598 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004599 // Look for CSINC
Balaram Makame9b27252016-03-10 17:54:55 +00004600 case AArch64::CSINCWr:
4601 case AArch64::CSINCXr: {
4602 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4603 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4604 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4605 DefMI->getOperand(2).getReg() == AArch64::XZR))
4606 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004607
Balaram Makame9b27252016-03-10 17:54:55 +00004608 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4609 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004610
Balaram Makame9b27252016-03-10 17:54:55 +00004611 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
Balaram Makame9b27252016-03-10 17:54:55 +00004612 // Convert only when the condition code is not modified between
4613 // the CSINC and the branch. The CC may be used by other
4614 // instructions in between.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00004615 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
Balaram Makame9b27252016-03-10 17:54:55 +00004616 return false;
4617 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004618 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4619 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004620 if (IsNegativeBranch)
4621 CC = AArch64CC::getInvertedCondCode(CC);
4622 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004623 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004624 return true;
4625 }
4626 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004627}
Alex Lorenzf3630112015-08-18 22:52:15 +00004628
4629std::pair<unsigned, unsigned>
4630AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4631 const unsigned Mask = AArch64II::MO_FRAGMENT;
4632 return std::make_pair(TF & Mask, TF & ~Mask);
4633}
4634
4635ArrayRef<std::pair<unsigned, const char *>>
4636AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4637 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004638
Hal Finkel982e8d42015-08-30 08:07:29 +00004639 static const std::pair<unsigned, const char *> TargetFlags[] = {
Jessica Paquette809d7082017-07-28 03:21:58 +00004640 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4641 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4642 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
Alex Lorenzf3630112015-08-18 22:52:15 +00004643 {MO_HI12, "aarch64-hi12"}};
4644 return makeArrayRef(TargetFlags);
4645}
4646
4647ArrayRef<std::pair<unsigned, const char *>>
4648AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4649 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004650
Hal Finkel982e8d42015-08-30 08:07:29 +00004651 static const std::pair<unsigned, const char *> TargetFlags[] = {
Jessica Paquette809d7082017-07-28 03:21:58 +00004652 {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
Alex Lorenzf3630112015-08-18 22:52:15 +00004653 return makeArrayRef(TargetFlags);
4654}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004655
Geoff Berry6748abe2017-07-13 02:28:54 +00004656ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
4657AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4658 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
Geoff Berryb1e87142017-07-14 21:44:12 +00004659 {{MOSuppressPair, "aarch64-suppress-pair"},
4660 {MOStridedAccess, "aarch64-strided-access"}};
Geoff Berry6748abe2017-07-13 02:28:54 +00004661 return makeArrayRef(TargetFlags);
4662}
4663
Jessica Paquette02c124d2017-12-18 19:33:21 +00004664 /// Constants defining how certain sequences should be outlined.
4665 /// This encompasses how an outlined function should be called, and what kind of
4666 /// frame should be emitted for that outlined function.
4667 ///
4668 /// \p MachineOutlinerDefault implies that the function should be called with
4669 /// a save and restore of LR to the stack.
4670 ///
4671 /// That is,
4672 ///
4673 /// I1 Save LR OUTLINED_FUNCTION:
4674 /// I2 --> BL OUTLINED_FUNCTION I1
4675 /// I3 Restore LR I2
4676 /// I3
4677 /// RET
4678 ///
4679 /// * Call construction overhead: 3 (save + BL + restore)
4680 /// * Frame construction overhead: 1 (ret)
4681 /// * Requires stack fixups? Yes
4682 ///
4683 /// \p MachineOutlinerTailCall implies that the function is being created from
4684 /// a sequence of instructions ending in a return.
4685 ///
4686 /// That is,
4687 ///
4688 /// I1 OUTLINED_FUNCTION:
4689 /// I2 --> B OUTLINED_FUNCTION I1
4690 /// RET I2
4691 /// RET
4692 ///
4693 /// * Call construction overhead: 1 (B)
4694 /// * Frame construction overhead: 0 (Return included in sequence)
4695 /// * Requires stack fixups? No
4696 ///
4697 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4698 /// a BL instruction, but doesn't require LR to be saved and restored. This
4699 /// happens when LR is known to be dead.
4700 ///
4701 /// That is,
4702 ///
4703 /// I1 OUTLINED_FUNCTION:
4704 /// I2 --> BL OUTLINED_FUNCTION I1
4705 /// I3 I2
4706 /// I3
4707 /// RET
4708 ///
4709 /// * Call construction overhead: 1 (BL)
4710 /// * Frame construction overhead: 1 (RET)
4711 /// * Requires stack fixups? No
4712 ///
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004713enum MachineOutlinerClass {
4714 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4715 MachineOutlinerTailCall, /// Only emit a branch.
4716 MachineOutlinerNoLRSave /// Emit a call and return.
4717};
Jessica Paquetted87f5442017-07-29 02:55:46 +00004718
Jessica Paquette3291e732018-01-09 00:26:18 +00004719enum MachineOutlinerMBBFlags {
4720 LRUnavailableSomewhere = 0x2,
4721 HasCalls = 0x4
4722};
4723
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004724bool AArch64InstrInfo::canOutlineWithoutLRSave(
4725 MachineBasicBlock::iterator &CallInsertionPt) const {
4726 // Was LR saved in the function containing this basic block?
4727 MachineBasicBlock &MBB = *(CallInsertionPt->getParent());
4728 LiveRegUnits LRU(getRegisterInfo());
4729 LRU.addLiveOuts(MBB);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004730
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004731 // Get liveness information from the end of the block to the end of the
4732 // prospective outlined region.
4733 std::for_each(MBB.rbegin(),
Jessica Paquette02c124d2017-12-18 19:33:21 +00004734 (MachineBasicBlock::reverse_iterator)CallInsertionPt,
4735 [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); });
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004736
4737 // If the link register is available at this point, then we can safely outline
4738 // the region without saving/restoring LR. Otherwise, we must emit a save and
4739 // restore.
4740 return LRU.available(AArch64::LR);
Jessica Paquette809d7082017-07-28 03:21:58 +00004741}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004742
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004743AArch64GenInstrInfo::MachineOutlinerInfo
4744AArch64InstrInfo::getOutlininingCandidateInfo(
4745 std::vector<
4746 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
4747 &RepeatedSequenceLocs) const {
Jessica Paquette809d7082017-07-28 03:21:58 +00004748
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004749 unsigned CallID = MachineOutlinerDefault;
4750 unsigned FrameID = MachineOutlinerDefault;
4751 unsigned NumInstrsForCall = 3;
4752 unsigned NumInstrsToCreateFrame = 1;
Jessica Paquette809d7082017-07-28 03:21:58 +00004753
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004754 auto DoesntNeedLRSave =
4755 [this](std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>
4756 &I) { return canOutlineWithoutLRSave(I.second); };
4757
4758 // If the last instruction in any candidate is a terminator, then we should
4759 // tail call all of the candidates.
4760 if (RepeatedSequenceLocs[0].second->isTerminator()) {
4761 CallID = MachineOutlinerTailCall;
4762 FrameID = MachineOutlinerTailCall;
4763 NumInstrsForCall = 1;
4764 NumInstrsToCreateFrame = 0;
4765 }
4766
4767 else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4768 DoesntNeedLRSave)) {
4769 CallID = MachineOutlinerNoLRSave;
4770 FrameID = MachineOutlinerNoLRSave;
4771 NumInstrsForCall = 1;
4772 NumInstrsToCreateFrame = 1;
4773 }
4774
Jessica Paquette02c124d2017-12-18 19:33:21 +00004775 // Check if the range contains a call. These require a save + restore of the
4776 // link register.
4777 if (std::any_of(RepeatedSequenceLocs[0].first, RepeatedSequenceLocs[0].second,
4778 [](const MachineInstr &MI) { return MI.isCall(); }))
4779 NumInstrsToCreateFrame += 2; // Save + restore the link register.
4780
4781 // Handle the last instruction separately. If this is a tail call, then the
4782 // last instruction is a call. We don't want to save + restore in this case.
4783 // However, it could be possible that the last instruction is a call without
4784 // it being valid to tail call this sequence. We should consider this as well.
4785 else if (RepeatedSequenceLocs[0].second->isCall() &&
4786 FrameID != MachineOutlinerTailCall)
4787 NumInstrsToCreateFrame += 2;
4788
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004789 return MachineOutlinerInfo(NumInstrsForCall, NumInstrsToCreateFrame, CallID,
4790 FrameID);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004791}
4792
Jessica Paquette02c124d2017-12-18 19:33:21 +00004793bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
4794 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
Matthias Braunf1caa282017-12-15 22:22:58 +00004795 const Function &F = MF.getFunction();
Jessica Paquette13593842017-10-07 00:16:34 +00004796
4797 // If F uses a redzone, then don't outline from it because it might mess up
4798 // the stack.
Matthias Braunf1caa282017-12-15 22:22:58 +00004799 if (!F.hasFnAttribute(Attribute::NoRedZone))
Jessica Paquette13593842017-10-07 00:16:34 +00004800 return false;
4801
Jessica Paquette13593842017-10-07 00:16:34 +00004802 // Can F be deduplicated by the linker? If it can, don't outline from it.
Matthias Braunf1caa282017-12-15 22:22:58 +00004803 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
Jessica Paquette13593842017-10-07 00:16:34 +00004804 return false;
Matthias Braunf1caa282017-12-15 22:22:58 +00004805
Jessica Paquette13593842017-10-07 00:16:34 +00004806 return true;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004807}
4808
Jessica Paquette3291e732018-01-09 00:26:18 +00004809unsigned
4810AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
4811 unsigned Flags = 0x0;
4812 // Check if there's a call inside this MachineBasicBlock. If there is, then
4813 // set a flag.
4814 if (std::any_of(MBB.begin(), MBB.end(),
4815 [](MachineInstr &MI) { return MI.isCall(); }))
4816 Flags |= MachineOutlinerMBBFlags::HasCalls;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004817
Jessica Paquette3291e732018-01-09 00:26:18 +00004818 // Check if LR is available through all of the MBB. If it's not, then set
4819 // a flag.
4820 LiveRegUnits LRU(getRegisterInfo());
4821 LRU.addLiveOuts(MBB);
4822
4823 std::for_each(MBB.rbegin(),
4824 MBB.rend(),
4825 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
4826
4827 if (!LRU.available(AArch64::LR))
4828 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
4829
4830 return Flags;
4831}
4832
4833AArch64GenInstrInfo::MachineOutlinerInstrType
4834AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
4835 unsigned Flags) const {
4836 MachineInstr &MI = *MIT;
4837 MachineBasicBlock *MBB = MI.getParent();
4838 MachineFunction *MF = MBB->getParent();
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004839 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
4840
4841 // Don't outline LOHs.
4842 if (FuncInfo->getLOHRelated().count(&MI))
4843 return MachineOutlinerInstrType::Illegal;
4844
4845 // Don't allow debug values to impact outlining type.
Jessica Paquette3291e732018-01-09 00:26:18 +00004846 if (MI.isDebugValue() || MI.isIndirectDebugValue())
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004847 return MachineOutlinerInstrType::Invisible;
Jessica Paquette3291e732018-01-09 00:26:18 +00004848
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004849 // Is this a terminator for a basic block?
4850 if (MI.isTerminator()) {
4851
4852 // Is this the end of a function?
4853 if (MI.getParent()->succ_empty())
Jessica Paquette809d7082017-07-28 03:21:58 +00004854 return MachineOutlinerInstrType::Legal;
Jessica Paquette3291e732018-01-09 00:26:18 +00004855
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004856 // It's not, so don't outline it.
4857 return MachineOutlinerInstrType::Illegal;
4858 }
4859
Jessica Paquettec191f102018-01-10 18:49:57 +00004860 // Special cases for instructions that can always be outlined, but will fail
4861 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
4862 // be outlined because they don't require a *specific* value to be in LR.
4863 if (MI.getOpcode() == AArch64::ADRP)
4864 return MachineOutlinerInstrType::Legal;
4865
Jessica Paquette02c124d2017-12-18 19:33:21 +00004866 // Outline calls without stack parameters or aggregate parameters.
4867 if (MI.isCall()) {
4868 const Module *M = MF->getFunction().getParent();
4869 assert(M && "No module?");
4870
4871 // Get the function associated with the call. Look at each operand and find
4872 // the one that represents the callee and get its name.
4873 Function *Callee = nullptr;
4874 for (const MachineOperand &MOP : MI.operands()) {
4875 if (MOP.isSymbol()) {
4876 Callee = M->getFunction(MOP.getSymbolName());
4877 break;
4878 }
4879
4880 else if (MOP.isGlobal()) {
4881 Callee = M->getFunction(MOP.getGlobal()->getGlobalIdentifier());
4882 break;
4883 }
4884 }
4885
4886 // Only handle functions that we have information about.
4887 if (!Callee)
4888 return MachineOutlinerInstrType::Illegal;
Jessica Paquette3291e732018-01-09 00:26:18 +00004889
Jessica Paquette02c124d2017-12-18 19:33:21 +00004890 // We have a function we have information about. Check it if it's something
4891 // can safely outline.
4892
4893 // If the callee is vararg, it passes parameters on the stack. Don't touch
4894 // it.
4895 // FIXME: Functions like printf are very common and we should be able to
4896 // outline them.
4897 if (Callee->isVarArg())
4898 return MachineOutlinerInstrType::Illegal;
4899
4900 // Check if any of the arguments are a pointer to a struct. We don't want
4901 // to outline these since they might be loaded in two instructions.
4902 for (Argument &Arg : Callee->args()) {
4903 if (Arg.getType()->isPointerTy() &&
Jessica Paquette3291e732018-01-09 00:26:18 +00004904 Arg.getType()->getPointerElementType()->isAggregateType())
Jessica Paquette02c124d2017-12-18 19:33:21 +00004905 return MachineOutlinerInstrType::Illegal;
4906 }
4907
4908 // If the thing we're calling doesn't access memory at all, then we're good
4909 // to go.
Jessica Paquette3291e732018-01-09 00:26:18 +00004910 if (Callee->doesNotAccessMemory())
Jessica Paquette02c124d2017-12-18 19:33:21 +00004911 return MachineOutlinerInstrType::Legal;
Jessica Paquette3291e732018-01-09 00:26:18 +00004912
Jessica Paquette02c124d2017-12-18 19:33:21 +00004913
4914 // It accesses memory. Get the machine function for the callee to see if
4915 // it's safe to outline.
4916 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
4917
4918 // We don't know what's going on with the callee at all. Don't touch it.
Jessica Paquette3291e732018-01-09 00:26:18 +00004919 if (!CalleeMF)
Jessica Paquette02c124d2017-12-18 19:33:21 +00004920 return MachineOutlinerInstrType::Illegal;
4921
4922 // Does it pass anything on the stack? If it does, don't outline it.
4923 if (CalleeMF->getInfo<AArch64FunctionInfo>()->getBytesInStackArgArea() != 0)
4924 return MachineOutlinerInstrType::Illegal;
Jessica Paquette3291e732018-01-09 00:26:18 +00004925
Jessica Paquette02c124d2017-12-18 19:33:21 +00004926 // It doesn't, so it's safe to outline and we're done.
4927 return MachineOutlinerInstrType::Legal;
4928 }
4929
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004930 // Don't outline positions.
4931 if (MI.isPosition())
4932 return MachineOutlinerInstrType::Illegal;
4933
Jessica Paquetted36945b2017-08-08 21:51:26 +00004934 // Don't touch the link register or W30.
4935 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
4936 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
4937 return MachineOutlinerInstrType::Illegal;
4938
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004939 // Make sure none of the operands are un-outlinable.
Jessica Paquetted36945b2017-08-08 21:51:26 +00004940 for (const MachineOperand &MOP : MI.operands()) {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004941 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
4942 MOP.isTargetIndex())
4943 return MachineOutlinerInstrType::Illegal;
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004944
4945 // Don't outline anything that uses the link register.
4946 if (MOP.isReg() && getRegisterInfo().regsOverlap(MOP.getReg(), AArch64::LR))
4947 return MachineOutlinerInstrType::Illegal;
Jessica Paquetted36945b2017-08-08 21:51:26 +00004948 }
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004949
4950 // Does this use the stack?
4951 if (MI.modifiesRegister(AArch64::SP, &RI) ||
4952 MI.readsRegister(AArch64::SP, &RI)) {
Jessica Paquette3291e732018-01-09 00:26:18 +00004953 // True if there is no chance that any outlined candidate from this range
4954 // could require stack fixups. That is, both
4955 // * LR is available in the range (No save/restore around call)
4956 // * The range doesn't include calls (No save/restore in outlined frame)
4957 // are true.
4958 bool MightNeedStackFixUp =
4959 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
4960 MachineOutlinerMBBFlags::HasCalls));
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004961
Jessica Paquette3291e732018-01-09 00:26:18 +00004962 // If this instruction is in a range where it *never* needs to be fixed
4963 // up, then we can *always* outline it. This is true even if it's not
4964 // possible to fix that instruction up.
4965 //
4966 // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
4967 // use SP. Suppose that I1 sits within a range that definitely doesn't
4968 // need stack fixups, while I2 sits in a range that does.
4969 //
4970 // First, I1 can be outlined as long as we *never* fix up the stack in
4971 // any sequence containing it. I1 is already a safe instruction in the
4972 // original program, so as long as we don't modify it we're good to go.
4973 // So this leaves us with showing that outlining I2 won't break our
4974 // program.
4975 //
4976 // Suppose I1 and I2 belong to equivalent candidate sequences. When we
4977 // look at I2, we need to see if it can be fixed up. Suppose I2, (and
4978 // thus I1) cannot be fixed up. Then I2 will be assigned an unique
4979 // integer label; thus, I2 cannot belong to any candidate sequence (a
4980 // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
4981 // as well, so we're good. Thus, I1 is always safe to outline.
4982 //
4983 // This gives us two things: first off, it buys us some more instructions
4984 // for our search space by deeming stack instructions illegal only when
4985 // they can't be fixed up AND we might have to fix them up. Second off,
4986 // This allows us to catch tricky instructions like, say,
4987 // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
4988 // be paired with later SUBXris, which might *not* end up being outlined.
4989 // If we mess with the stack to save something, then an ADDXri messes with
4990 // it *after*, then we aren't going to restore the right something from
4991 // the stack if we don't outline the corresponding SUBXri first. ADDXris and
4992 // SUBXris are extremely common in prologue/epilogue code, so supporting
4993 // them in the outliner can be a pretty big win!
4994 if (!MightNeedStackFixUp)
4995 return MachineOutlinerInstrType::Legal;
4996
4997 // At this point, we have a stack instruction that we might need to fix
4998 // up. We'll handle it if it's a load or store.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004999 if (MI.mayLoadOrStore()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00005000 unsigned Base; // Filled with the base regiser of MI.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005001 int64_t Offset; // Filled with the offset of MI.
5002 unsigned DummyWidth;
5003
5004 // Does it allow us to offset the base register and is the base SP?
5005 if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
Jessica Paquette809d7082017-07-28 03:21:58 +00005006 Base != AArch64::SP)
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005007 return MachineOutlinerInstrType::Illegal;
5008
5009 // Find the minimum/maximum offset for this instruction and check if
5010 // fixing it up would be in range.
Jessica Paquette59948662017-12-07 21:51:43 +00005011 int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
Jessica Paquette02c124d2017-12-18 19:33:21 +00005012 unsigned Scale; // The scale to multiply the offsets by.
5013 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005014
5015 // TODO: We should really test what happens if an instruction overflows.
5016 // This is tricky to test with IR tests, but when the outliner is moved
5017 // to a MIR test, it really ought to be checked.
Jessica Paquette59948662017-12-07 21:51:43 +00005018 Offset += 16; // Update the offset to what it would be if we outlined.
5019 if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
Jessica Paquette809d7082017-07-28 03:21:58 +00005020 return MachineOutlinerInstrType::Illegal;
Galina Kistanova9dee3f02017-12-13 15:26:27 +00005021
5022 // It's in range, so we can outline it.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005023 return MachineOutlinerInstrType::Legal;
5024 }
5025
5026 // We can't fix it up, so don't outline it.
5027 return MachineOutlinerInstrType::Illegal;
5028 }
5029
5030 return MachineOutlinerInstrType::Legal;
5031}
5032
5033void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5034 for (MachineInstr &MI : MBB) {
5035 unsigned Base, Width;
5036 int64_t Offset;
5037
5038 // Is this a load or store with an immediate offset with SP as the base?
5039 if (!MI.mayLoadOrStore() ||
5040 !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
5041 Base != AArch64::SP)
5042 continue;
5043
5044 // It is, so we have to fix it up.
5045 unsigned Scale;
5046 int64_t Dummy1, Dummy2;
5047
5048 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5049 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5050 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5051 assert(Scale != 0 && "Unexpected opcode!");
5052
5053 // We've pushed the return address to the stack, so add 16 to the offset.
5054 // This is safe, since we already checked if it would overflow when we
5055 // checked if this instruction was legal to outline.
Jessica Paquette809d7082017-07-28 03:21:58 +00005056 int64_t NewImm = (Offset + 16) / Scale;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005057 StackOffsetOperand.setImm(NewImm);
5058 }
5059}
5060
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005061void AArch64InstrInfo::insertOutlinerEpilogue(
5062 MachineBasicBlock &MBB, MachineFunction &MF,
5063 const MachineOutlinerInfo &MInfo) const {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005064
Jessica Paquette8565d3a2017-12-18 21:44:52 +00005065 // Is there a call in the outlined range?
5066 if (std::any_of(MBB.instr_begin(), MBB.instr_end(),
5067 [](MachineInstr &MI) { return MI.isCall(); })) {
Jessica Paquette02c124d2017-12-18 19:33:21 +00005068 // Fix up the instructions in the range, since we're going to modify the
5069 // stack.
5070 fixupPostOutline(MBB);
5071
5072 // LR has to be a live in so that we can save it.
5073 MBB.addLiveIn(AArch64::LR);
5074
5075 MachineBasicBlock::iterator It = MBB.begin();
5076 MachineBasicBlock::iterator Et = MBB.end();
5077
5078 if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
5079 Et = std::prev(MBB.end());
5080
5081 // Insert a save before the outlined region
5082 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5083 .addReg(AArch64::SP, RegState::Define)
5084 .addReg(AArch64::LR)
5085 .addReg(AArch64::SP)
5086 .addImm(-16);
5087 It = MBB.insert(It, STRXpre);
5088
5089 // Insert a restore before the terminator for the function.
5090 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5091 .addReg(AArch64::SP, RegState::Define)
5092 .addReg(AArch64::LR, RegState::Define)
5093 .addReg(AArch64::SP)
5094 .addImm(16);
5095 Et = MBB.insert(Et, LDRXpost);
5096 }
5097
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005098 // If this is a tail call outlined function, then there's already a return.
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005099 if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005100 return;
5101
5102 // It's not a tail call, so we have to insert the return ourselves.
5103 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5104 .addReg(AArch64::LR, RegState::Undef);
5105 MBB.insert(MBB.end(), ret);
5106
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005107 // Did we have to modify the stack by saving the link register?
5108 if (MInfo.FrameConstructionID == MachineOutlinerNoLRSave)
5109 return;
5110
5111 // We modified the stack.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005112 // Walk over the basic block and fix up all the stack accesses.
5113 fixupPostOutline(MBB);
5114}
5115
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005116void AArch64InstrInfo::insertOutlinerPrologue(
5117 MachineBasicBlock &MBB, MachineFunction &MF,
5118 const MachineOutlinerInfo &MInfo) const {}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005119
5120MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
5121 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005122 MachineFunction &MF, const MachineOutlinerInfo &MInfo) const {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005123
5124 // Are we tail calling?
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005125 if (MInfo.CallConstructionID == MachineOutlinerTailCall) {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005126 // If yes, then we can just branch to the label.
Jessica Paquetted87f5442017-07-29 02:55:46 +00005127 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B))
5128 .addGlobalAddress(M.getNamedValue(MF.getName())));
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005129 return It;
5130 }
5131
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005132 // Are we saving the link register?
5133 if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) {
5134 // No, so just insert the call.
5135 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5136 .addGlobalAddress(M.getNamedValue(MF.getName())));
5137 return It;
5138 }
5139
5140 // We have a default call. Save the link register.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005141 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5142 .addReg(AArch64::SP, RegState::Define)
5143 .addReg(AArch64::LR)
5144 .addReg(AArch64::SP)
5145 .addImm(-16);
5146 It = MBB.insert(It, STRXpre);
5147 It++;
5148
5149 // Insert the call.
Jessica Paquetted87f5442017-07-29 02:55:46 +00005150 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5151 .addGlobalAddress(M.getNamedValue(MF.getName())));
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005152
5153 It++;
5154
5155 // Restore the link register.
5156 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5157 .addReg(AArch64::SP, RegState::Define)
Jessica Paquette6315d2d2017-08-10 23:11:24 +00005158 .addReg(AArch64::LR, RegState::Define)
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005159 .addReg(AArch64::SP)
5160 .addImm(16);
5161 It = MBB.insert(It, LDRXpost);
5162
5163 return It;
Matthew Simpson9439f542017-12-27 15:25:01 +00005164}