blob: 9afd05f99e92c17f36797cd94ca553e9f8a348ee [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
Jessica Paquetteea8cc092017-03-17 22:26:55 +000015#include "AArch64MachineFunctionInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000016#include "AArch64Subtarget.h"
17#include "MCTargetDesc/AArch64AddressingModes.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000018#include "Utils/AArch64BaseInfo.h"
19#include "llvm/ADT/ArrayRef.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000020#include "llvm/ADT/STLExtras.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000021#include "llvm/ADT/SmallVector.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000022#include "llvm/CodeGen/MachineBasicBlock.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000024#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineInstr.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000026#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineMemOperand.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000028#include "llvm/CodeGen/MachineOperand.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000029#include "llvm/CodeGen/MachineRegisterInfo.h"
Diana Picus4b972882016-09-13 07:45:17 +000030#include "llvm/CodeGen/StackMaps.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000031#include "llvm/IR/DebugLoc.h"
32#include "llvm/IR/GlobalValue.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000033#include "llvm/MC/MCInst.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000034#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/Support/Casting.h"
36#include "llvm/Support/CodeGen.h"
37#include "llvm/Support/CommandLine.h"
38#include "llvm/Support/Compiler.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000039#include "llvm/Support/ErrorHandling.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000040#include "llvm/Support/MathExtras.h"
41#include "llvm/Target/TargetMachine.h"
42#include "llvm/Target/TargetOptions.h"
43#include "llvm/Target/TargetRegisterInfo.h"
44#include "llvm/Target/TargetSubtargetInfo.h"
45#include <cassert>
46#include <cstdint>
47#include <iterator>
48#include <utility>
Tim Northover3b0846e2014-05-24 12:50:23 +000049
50using namespace llvm;
51
52#define GET_INSTRINFO_CTOR_DTOR
53#include "AArch64GenInstrInfo.inc"
54
Jessica Paquette809d7082017-07-28 03:21:58 +000055static cl::opt<unsigned> TBZDisplacementBits(
56 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
57 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
58
59static cl::opt<unsigned> CBZDisplacementBits(
60 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
61 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
Matt Arsenaulte8da1452016-08-02 08:06:17 +000062
63static cl::opt<unsigned>
Jessica Paquette809d7082017-07-28 03:21:58 +000064 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
65 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
Matt Arsenaulte8da1452016-08-02 08:06:17 +000066
Tim Northover3b0846e2014-05-24 12:50:23 +000067AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
68 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
Eric Christophera0de2532015-03-18 20:37:30 +000069 RI(STI.getTargetTriple()), Subtarget(STI) {}
Tim Northover3b0846e2014-05-24 12:50:23 +000070
71/// GetInstSize - Return the number of bytes of code the specified
72/// instruction may be. This returns the maximum number of bytes.
Sjoerd Meijer89217f82016-07-28 16:32:22 +000073unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000074 const MachineBasicBlock &MBB = *MI.getParent();
Tim Northoverd5531f72014-06-17 11:31:42 +000075 const MachineFunction *MF = MBB.getParent();
76 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +000077
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000078 if (MI.getOpcode() == AArch64::INLINEASM)
79 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
Tim Northoverd5531f72014-06-17 11:31:42 +000080
Diana Picus4b972882016-09-13 07:45:17 +000081 // FIXME: We currently only handle pseudoinstructions that don't get expanded
82 // before the assembly printer.
83 unsigned NumBytes = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000084 const MCInstrDesc &Desc = MI.getDesc();
Tim Northover3b0846e2014-05-24 12:50:23 +000085 switch (Desc.getOpcode()) {
86 default:
Diana Picusc65d8bd2016-07-27 15:13:25 +000087 // Anything not explicitly designated otherwise is a normal 4-byte insn.
Diana Picus4b972882016-09-13 07:45:17 +000088 NumBytes = 4;
89 break;
Tim Northover3b0846e2014-05-24 12:50:23 +000090 case TargetOpcode::DBG_VALUE:
91 case TargetOpcode::EH_LABEL:
92 case TargetOpcode::IMPLICIT_DEF:
93 case TargetOpcode::KILL:
Diana Picus4b972882016-09-13 07:45:17 +000094 NumBytes = 0;
95 break;
96 case TargetOpcode::STACKMAP:
97 // The upper bound for a stackmap intrinsic is the full length of its shadow
98 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
99 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
100 break;
101 case TargetOpcode::PATCHPOINT:
102 // The size of the patchpoint intrinsic is the number of bytes requested
103 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
104 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
105 break;
Diana Picusab5a4c72016-08-01 08:38:49 +0000106 case AArch64::TLSDESC_CALLSEQ:
107 // This gets lowered to an instruction sequence which takes 16 bytes
Diana Picus4b972882016-09-13 07:45:17 +0000108 NumBytes = 16;
109 break;
Tim Northover3b0846e2014-05-24 12:50:23 +0000110 }
111
Diana Picus4b972882016-09-13 07:45:17 +0000112 return NumBytes;
Tim Northover3b0846e2014-05-24 12:50:23 +0000113}
114
115static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
116 SmallVectorImpl<MachineOperand> &Cond) {
117 // Block ends with fall-through condbranch.
118 switch (LastInst->getOpcode()) {
119 default:
120 llvm_unreachable("Unknown branch instruction?");
121 case AArch64::Bcc:
122 Target = LastInst->getOperand(1).getMBB();
123 Cond.push_back(LastInst->getOperand(0));
124 break;
125 case AArch64::CBZW:
126 case AArch64::CBZX:
127 case AArch64::CBNZW:
128 case AArch64::CBNZX:
129 Target = LastInst->getOperand(1).getMBB();
130 Cond.push_back(MachineOperand::CreateImm(-1));
131 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
132 Cond.push_back(LastInst->getOperand(0));
133 break;
134 case AArch64::TBZW:
135 case AArch64::TBZX:
136 case AArch64::TBNZW:
137 case AArch64::TBNZX:
138 Target = LastInst->getOperand(2).getMBB();
139 Cond.push_back(MachineOperand::CreateImm(-1));
140 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
141 Cond.push_back(LastInst->getOperand(0));
142 Cond.push_back(LastInst->getOperand(1));
143 }
144}
145
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000146static unsigned getBranchDisplacementBits(unsigned Opc) {
147 switch (Opc) {
148 default:
149 llvm_unreachable("unexpected opcode!");
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000150 case AArch64::B:
151 return 64;
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000152 case AArch64::TBNZW:
153 case AArch64::TBZW:
154 case AArch64::TBNZX:
155 case AArch64::TBZX:
156 return TBZDisplacementBits;
157 case AArch64::CBNZW:
158 case AArch64::CBZW:
159 case AArch64::CBNZX:
160 case AArch64::CBZX:
161 return CBZDisplacementBits;
162 case AArch64::Bcc:
163 return BCCDisplacementBits;
164 }
165}
166
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000167bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
168 int64_t BrOffset) const {
169 unsigned Bits = getBranchDisplacementBits(BranchOp);
170 assert(Bits >= 3 && "max branch displacement must be enough to jump"
171 "over conditional branch expansion");
172 return isIntN(Bits, BrOffset / 4);
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000173}
174
Jessica Paquette809d7082017-07-28 03:21:58 +0000175MachineBasicBlock *
176AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000177 switch (MI.getOpcode()) {
178 default:
179 llvm_unreachable("unexpected opcode!");
180 case AArch64::B:
181 return MI.getOperand(0).getMBB();
182 case AArch64::TBZW:
183 case AArch64::TBNZW:
184 case AArch64::TBZX:
185 case AArch64::TBNZX:
186 return MI.getOperand(2).getMBB();
187 case AArch64::CBZW:
188 case AArch64::CBNZW:
189 case AArch64::CBZX:
190 case AArch64::CBNZX:
191 case AArch64::Bcc:
192 return MI.getOperand(1).getMBB();
193 }
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000194}
195
Tim Northover3b0846e2014-05-24 12:50:23 +0000196// Branch analysis.
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000197bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
198 MachineBasicBlock *&TBB,
199 MachineBasicBlock *&FBB,
200 SmallVectorImpl<MachineOperand> &Cond,
201 bool AllowModify) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000202 // If the block has no terminators, it just falls into the block after it.
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000203 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
204 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000205 return false;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000206
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000207 if (!isUnpredicatedTerminator(*I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000208 return false;
209
210 // Get the last instruction in the block.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000211 MachineInstr *LastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000212
213 // If there is only one terminator instruction, process it.
214 unsigned LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000215 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000216 if (isUncondBranchOpcode(LastOpc)) {
217 TBB = LastInst->getOperand(0).getMBB();
218 return false;
219 }
220 if (isCondBranchOpcode(LastOpc)) {
221 // Block ends with fall-through condbranch.
222 parseCondBranch(LastInst, TBB, Cond);
223 return false;
224 }
225 return true; // Can't handle indirect branch.
226 }
227
228 // Get the instruction before it if it is a terminator.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000229 MachineInstr *SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000230 unsigned SecondLastOpc = SecondLastInst->getOpcode();
231
232 // If AllowModify is true and the block ends with two or more unconditional
233 // branches, delete all but the first unconditional branch.
234 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
235 while (isUncondBranchOpcode(SecondLastOpc)) {
236 LastInst->eraseFromParent();
237 LastInst = SecondLastInst;
238 LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000239 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000240 // Return now the only terminator is an unconditional branch.
241 TBB = LastInst->getOperand(0).getMBB();
242 return false;
243 } else {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000244 SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000245 SecondLastOpc = SecondLastInst->getOpcode();
246 }
247 }
248 }
249
250 // If there are three terminators, we don't know what sort of block this is.
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000251 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000252 return true;
253
254 // If the block ends with a B and a Bcc, handle it.
255 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
256 parseCondBranch(SecondLastInst, TBB, Cond);
257 FBB = LastInst->getOperand(0).getMBB();
258 return false;
259 }
260
261 // If the block ends with two unconditional branches, handle it. The second
262 // one is not executed, so remove it.
263 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
264 TBB = SecondLastInst->getOperand(0).getMBB();
265 I = LastInst;
266 if (AllowModify)
267 I->eraseFromParent();
268 return false;
269 }
270
271 // ...likewise if it ends with an indirect branch followed by an unconditional
272 // branch.
273 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
274 I = LastInst;
275 if (AllowModify)
276 I->eraseFromParent();
277 return true;
278 }
279
280 // Otherwise, can't handle this.
281 return true;
282}
283
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000284bool AArch64InstrInfo::reverseBranchCondition(
Tim Northover3b0846e2014-05-24 12:50:23 +0000285 SmallVectorImpl<MachineOperand> &Cond) const {
286 if (Cond[0].getImm() != -1) {
287 // Regular Bcc
288 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
289 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
290 } else {
291 // Folded compare-and-branch
292 switch (Cond[1].getImm()) {
293 default:
294 llvm_unreachable("Unknown conditional branch!");
295 case AArch64::CBZW:
296 Cond[1].setImm(AArch64::CBNZW);
297 break;
298 case AArch64::CBNZW:
299 Cond[1].setImm(AArch64::CBZW);
300 break;
301 case AArch64::CBZX:
302 Cond[1].setImm(AArch64::CBNZX);
303 break;
304 case AArch64::CBNZX:
305 Cond[1].setImm(AArch64::CBZX);
306 break;
307 case AArch64::TBZW:
308 Cond[1].setImm(AArch64::TBNZW);
309 break;
310 case AArch64::TBNZW:
311 Cond[1].setImm(AArch64::TBZW);
312 break;
313 case AArch64::TBZX:
314 Cond[1].setImm(AArch64::TBNZX);
315 break;
316 case AArch64::TBNZX:
317 Cond[1].setImm(AArch64::TBZX);
318 break;
319 }
320 }
321
322 return false;
323}
324
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000325unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000326 int *BytesRemoved) const {
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000327 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
328 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000329 return 0;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000330
Tim Northover3b0846e2014-05-24 12:50:23 +0000331 if (!isUncondBranchOpcode(I->getOpcode()) &&
332 !isCondBranchOpcode(I->getOpcode()))
333 return 0;
334
335 // Remove the branch.
336 I->eraseFromParent();
337
338 I = MBB.end();
339
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000340 if (I == MBB.begin()) {
341 if (BytesRemoved)
342 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000343 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000344 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000345 --I;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000346 if (!isCondBranchOpcode(I->getOpcode())) {
347 if (BytesRemoved)
348 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000349 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000350 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000351
352 // Remove the branch.
353 I->eraseFromParent();
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000354 if (BytesRemoved)
355 *BytesRemoved = 8;
356
Tim Northover3b0846e2014-05-24 12:50:23 +0000357 return 2;
358}
359
360void AArch64InstrInfo::instantiateCondBranch(
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000361 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000362 ArrayRef<MachineOperand> Cond) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000363 if (Cond[0].getImm() != -1) {
364 // Regular Bcc
365 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
366 } else {
367 // Folded compare-and-branch
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000368 // Note that we use addOperand instead of addReg to keep the flags.
Tim Northover3b0846e2014-05-24 12:50:23 +0000369 const MachineInstrBuilder MIB =
Diana Picus116bbab2017-01-13 09:58:52 +0000370 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
Tim Northover3b0846e2014-05-24 12:50:23 +0000371 if (Cond.size() > 3)
372 MIB.addImm(Cond[3].getImm());
373 MIB.addMBB(TBB);
374 }
375}
376
Jessica Paquette809d7082017-07-28 03:21:58 +0000377unsigned AArch64InstrInfo::insertBranch(
378 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
379 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000380 // Shouldn't be a fall through.
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +0000381 assert(TBB && "insertBranch must not be told to insert a fallthrough");
Tim Northover3b0846e2014-05-24 12:50:23 +0000382
383 if (!FBB) {
384 if (Cond.empty()) // Unconditional branch?
385 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
386 else
387 instantiateCondBranch(MBB, DL, TBB, Cond);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000388
389 if (BytesAdded)
390 *BytesAdded = 4;
391
Tim Northover3b0846e2014-05-24 12:50:23 +0000392 return 1;
393 }
394
395 // Two-way conditional branch.
396 instantiateCondBranch(MBB, DL, TBB, Cond);
397 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000398
399 if (BytesAdded)
400 *BytesAdded = 8;
401
Tim Northover3b0846e2014-05-24 12:50:23 +0000402 return 2;
403}
404
405// Find the original register that VReg is copied from.
406static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
407 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
408 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
409 if (!DefMI->isFullCopy())
410 return VReg;
411 VReg = DefMI->getOperand(1).getReg();
412 }
413 return VReg;
414}
415
416// Determine if VReg is defined by an instruction that can be folded into a
417// csel instruction. If so, return the folded opcode, and the replacement
418// register.
419static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
420 unsigned *NewVReg = nullptr) {
421 VReg = removeCopies(MRI, VReg);
422 if (!TargetRegisterInfo::isVirtualRegister(VReg))
423 return 0;
424
425 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
426 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
427 unsigned Opc = 0;
428 unsigned SrcOpNum = 0;
429 switch (DefMI->getOpcode()) {
430 case AArch64::ADDSXri:
431 case AArch64::ADDSWri:
432 // if NZCV is used, do not fold.
433 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
434 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000435 // fall-through to ADDXri and ADDWri.
436 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000437 case AArch64::ADDXri:
438 case AArch64::ADDWri:
439 // add x, 1 -> csinc.
440 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
441 DefMI->getOperand(3).getImm() != 0)
442 return 0;
443 SrcOpNum = 1;
444 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
445 break;
446
447 case AArch64::ORNXrr:
448 case AArch64::ORNWrr: {
449 // not x -> csinv, represented as orn dst, xzr, src.
450 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
451 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
452 return 0;
453 SrcOpNum = 2;
454 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
455 break;
456 }
457
458 case AArch64::SUBSXrr:
459 case AArch64::SUBSWrr:
460 // if NZCV is used, do not fold.
461 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
462 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000463 // fall-through to SUBXrr and SUBWrr.
464 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000465 case AArch64::SUBXrr:
466 case AArch64::SUBWrr: {
467 // neg x -> csneg, represented as sub dst, xzr, src.
468 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
469 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
470 return 0;
471 SrcOpNum = 2;
472 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
473 break;
474 }
475 default:
476 return 0;
477 }
478 assert(Opc && SrcOpNum && "Missing parameters");
479
480 if (NewVReg)
481 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
482 return Opc;
483}
484
Jessica Paquette809d7082017-07-28 03:21:58 +0000485bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
486 ArrayRef<MachineOperand> Cond,
487 unsigned TrueReg, unsigned FalseReg,
488 int &CondCycles, int &TrueCycles,
489 int &FalseCycles) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000490 // Check register classes.
491 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
492 const TargetRegisterClass *RC =
Eric Christophera0de2532015-03-18 20:37:30 +0000493 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
Tim Northover3b0846e2014-05-24 12:50:23 +0000494 if (!RC)
495 return false;
496
497 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
498 unsigned ExtraCondLat = Cond.size() != 1;
499
500 // GPRs are handled by csel.
501 // FIXME: Fold in x+1, -x, and ~x when applicable.
502 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
503 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
504 // Single-cycle csel, csinc, csinv, and csneg.
505 CondCycles = 1 + ExtraCondLat;
506 TrueCycles = FalseCycles = 1;
507 if (canFoldIntoCSel(MRI, TrueReg))
508 TrueCycles = 0;
509 else if (canFoldIntoCSel(MRI, FalseReg))
510 FalseCycles = 0;
511 return true;
512 }
513
514 // Scalar floating point is handled by fcsel.
515 // FIXME: Form fabs, fmin, and fmax when applicable.
516 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
517 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
518 CondCycles = 5 + ExtraCondLat;
519 TrueCycles = FalseCycles = 2;
520 return true;
521 }
522
523 // Can't do vectors.
524 return false;
525}
526
527void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000528 MachineBasicBlock::iterator I,
529 const DebugLoc &DL, unsigned DstReg,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000530 ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000531 unsigned TrueReg, unsigned FalseReg) const {
532 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
533
534 // Parse the condition code, see parseCondBranch() above.
535 AArch64CC::CondCode CC;
536 switch (Cond.size()) {
537 default:
538 llvm_unreachable("Unknown condition opcode in Cond");
539 case 1: // b.cc
540 CC = AArch64CC::CondCode(Cond[0].getImm());
541 break;
542 case 3: { // cbz/cbnz
543 // We must insert a compare against 0.
544 bool Is64Bit;
545 switch (Cond[1].getImm()) {
546 default:
547 llvm_unreachable("Unknown branch opcode in Cond");
548 case AArch64::CBZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000549 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000550 CC = AArch64CC::EQ;
551 break;
552 case AArch64::CBZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000553 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000554 CC = AArch64CC::EQ;
555 break;
556 case AArch64::CBNZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000557 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000558 CC = AArch64CC::NE;
559 break;
560 case AArch64::CBNZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000561 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000562 CC = AArch64CC::NE;
563 break;
564 }
565 unsigned SrcReg = Cond[2].getReg();
566 if (Is64Bit) {
567 // cmp reg, #0 is actually subs xzr, reg, #0.
568 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
569 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
570 .addReg(SrcReg)
571 .addImm(0)
572 .addImm(0);
573 } else {
574 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
575 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
576 .addReg(SrcReg)
577 .addImm(0)
578 .addImm(0);
579 }
580 break;
581 }
582 case 4: { // tbz/tbnz
583 // We must insert a tst instruction.
584 switch (Cond[1].getImm()) {
585 default:
586 llvm_unreachable("Unknown branch opcode in Cond");
587 case AArch64::TBZW:
588 case AArch64::TBZX:
589 CC = AArch64CC::EQ;
590 break;
591 case AArch64::TBNZW:
592 case AArch64::TBNZX:
593 CC = AArch64CC::NE;
594 break;
595 }
596 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
597 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
598 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
599 .addReg(Cond[2].getReg())
600 .addImm(
601 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
602 else
603 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
604 .addReg(Cond[2].getReg())
605 .addImm(
606 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
607 break;
608 }
609 }
610
611 unsigned Opc = 0;
612 const TargetRegisterClass *RC = nullptr;
613 bool TryFold = false;
614 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
615 RC = &AArch64::GPR64RegClass;
616 Opc = AArch64::CSELXr;
617 TryFold = true;
618 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
619 RC = &AArch64::GPR32RegClass;
620 Opc = AArch64::CSELWr;
621 TryFold = true;
622 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
623 RC = &AArch64::FPR64RegClass;
624 Opc = AArch64::FCSELDrrr;
625 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
626 RC = &AArch64::FPR32RegClass;
627 Opc = AArch64::FCSELSrrr;
628 }
629 assert(RC && "Unsupported regclass");
630
631 // Try folding simple instructions into the csel.
632 if (TryFold) {
633 unsigned NewVReg = 0;
634 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
635 if (FoldedOpc) {
636 // The folded opcodes csinc, csinc and csneg apply the operation to
637 // FalseReg, so we need to invert the condition.
638 CC = AArch64CC::getInvertedCondCode(CC);
639 TrueReg = FalseReg;
640 } else
641 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
642
643 // Fold the operation. Leave any dead instructions for DCE to clean up.
644 if (FoldedOpc) {
645 FalseReg = NewVReg;
646 Opc = FoldedOpc;
647 // The extends the live range of NewVReg.
648 MRI.clearKillFlags(NewVReg);
649 }
650 }
651
652 // Pull all virtual register into the appropriate class.
653 MRI.constrainRegClass(TrueReg, RC);
654 MRI.constrainRegClass(FalseReg, RC);
655
656 // Insert the csel.
Jessica Paquette809d7082017-07-28 03:21:58 +0000657 BuildMI(MBB, I, DL, get(Opc), DstReg)
658 .addReg(TrueReg)
659 .addReg(FalseReg)
660 .addImm(CC);
Tim Northover3b0846e2014-05-24 12:50:23 +0000661}
662
Lawrence Hu687097a2015-07-23 23:55:28 +0000663/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000664static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
665 uint64_t Imm = MI.getOperand(1).getImm();
Weiming Zhaob33a5552015-07-23 19:24:53 +0000666 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
667 uint64_t Encoding;
668 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
669}
670
Jiangning Liucd296372014-07-29 02:09:26 +0000671// FIXME: this implementation should be micro-architecture dependent, so a
672// micro-architecture target hook should be introduced here in future.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000673bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
Matthias Braun651cff42016-06-02 18:03:53 +0000674 if (!Subtarget.hasCustomCheapAsMoveHandling())
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000675 return MI.isAsCheapAsAMove();
Jiangning Liucd296372014-07-29 02:09:26 +0000676
Evandro Menezesd23324a2016-05-04 20:47:25 +0000677 unsigned Imm;
678
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000679 switch (MI.getOpcode()) {
Jiangning Liucd296372014-07-29 02:09:26 +0000680 default:
681 return false;
682
683 // add/sub on register without shift
684 case AArch64::ADDWri:
685 case AArch64::ADDXri:
686 case AArch64::SUBWri:
687 case AArch64::SUBXri:
Matthias Braun651cff42016-06-02 18:03:53 +0000688 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000689 MI.getOperand(3).getImm() == 0);
Evandro Menezesd23324a2016-05-04 20:47:25 +0000690
691 // add/sub on register with shift
692 case AArch64::ADDWrs:
693 case AArch64::ADDXrs:
694 case AArch64::SUBWrs:
695 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000696 Imm = MI.getOperand(3).getImm();
Matthias Braun651cff42016-06-02 18:03:53 +0000697 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
Evandro Menezesd23324a2016-05-04 20:47:25 +0000698 AArch64_AM::getArithShiftValue(Imm) < 4);
Jiangning Liucd296372014-07-29 02:09:26 +0000699
700 // logical ops on immediate
701 case AArch64::ANDWri:
702 case AArch64::ANDXri:
703 case AArch64::EORWri:
704 case AArch64::EORXri:
705 case AArch64::ORRWri:
706 case AArch64::ORRXri:
707 return true;
708
709 // logical ops on register without shift
710 case AArch64::ANDWrr:
711 case AArch64::ANDXrr:
712 case AArch64::BICWrr:
713 case AArch64::BICXrr:
714 case AArch64::EONWrr:
715 case AArch64::EONXrr:
716 case AArch64::EORWrr:
717 case AArch64::EORXrr:
718 case AArch64::ORNWrr:
719 case AArch64::ORNXrr:
720 case AArch64::ORRWrr:
721 case AArch64::ORRXrr:
722 return true;
Evandro Menezesd23324a2016-05-04 20:47:25 +0000723
724 // logical ops on register with shift
725 case AArch64::ANDWrs:
726 case AArch64::ANDXrs:
727 case AArch64::BICWrs:
728 case AArch64::BICXrs:
729 case AArch64::EONWrs:
730 case AArch64::EONXrs:
731 case AArch64::EORWrs:
732 case AArch64::EORXrs:
733 case AArch64::ORNWrs:
734 case AArch64::ORNXrs:
735 case AArch64::ORRWrs:
736 case AArch64::ORRXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000737 Imm = MI.getOperand(3).getImm();
Matthias Braun651cff42016-06-02 18:03:53 +0000738 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
Evandro Menezesd23324a2016-05-04 20:47:25 +0000739 AArch64_AM::getShiftValue(Imm) < 4 &&
740 AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
741
Weiming Zhaob33a5552015-07-23 19:24:53 +0000742 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
743 // ORRXri, it is as cheap as MOV
744 case AArch64::MOVi32imm:
745 return canBeExpandedToORR(MI, 32);
746 case AArch64::MOVi64imm:
747 return canBeExpandedToORR(MI, 64);
Haicheng Wu711ca862016-07-12 15:31:41 +0000748
Haicheng Wuf0b01272016-07-15 00:27:01 +0000749 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
750 // feature.
Haicheng Wu711ca862016-07-12 15:31:41 +0000751 case AArch64::FMOVS0:
752 case AArch64::FMOVD0:
753 return Subtarget.hasZeroCycleZeroing();
Haicheng Wuf0b01272016-07-15 00:27:01 +0000754 case TargetOpcode::COPY:
755 return (Subtarget.hasZeroCycleZeroing() &&
756 (MI.getOperand(1).getReg() == AArch64::WZR ||
757 MI.getOperand(1).getReg() == AArch64::XZR));
Jiangning Liucd296372014-07-29 02:09:26 +0000758 }
759
760 llvm_unreachable("Unknown opcode to check as cheap as a move!");
761}
762
Geoff Berryd6ac96f2017-05-23 19:57:45 +0000763bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
764 switch (MI.getOpcode()) {
765 default:
Balaram Makamb4419f92017-04-08 03:30:15 +0000766 return false;
Geoff Berryd6ac96f2017-05-23 19:57:45 +0000767
768 case AArch64::ADDWrs:
769 case AArch64::ADDXrs:
770 case AArch64::ADDSWrs:
771 case AArch64::ADDSXrs: {
772 unsigned Imm = MI.getOperand(3).getImm();
773 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
774 if (ShiftVal == 0)
775 return true;
776 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
777 }
778
779 case AArch64::ADDWrx:
780 case AArch64::ADDXrx:
781 case AArch64::ADDXrx64:
782 case AArch64::ADDSWrx:
783 case AArch64::ADDSXrx:
784 case AArch64::ADDSXrx64: {
785 unsigned Imm = MI.getOperand(3).getImm();
786 switch (AArch64_AM::getArithExtendType(Imm)) {
787 default:
788 return false;
789 case AArch64_AM::UXTB:
790 case AArch64_AM::UXTH:
791 case AArch64_AM::UXTW:
792 case AArch64_AM::UXTX:
793 return AArch64_AM::getArithShiftValue(Imm) <= 4;
794 }
795 }
796
797 case AArch64::SUBWrs:
798 case AArch64::SUBSWrs: {
799 unsigned Imm = MI.getOperand(3).getImm();
800 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
801 return ShiftVal == 0 ||
802 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
803 }
804
805 case AArch64::SUBXrs:
806 case AArch64::SUBSXrs: {
807 unsigned Imm = MI.getOperand(3).getImm();
808 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
809 return ShiftVal == 0 ||
810 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
811 }
812
813 case AArch64::SUBWrx:
814 case AArch64::SUBXrx:
815 case AArch64::SUBXrx64:
816 case AArch64::SUBSWrx:
817 case AArch64::SUBSXrx:
818 case AArch64::SUBSXrx64: {
819 unsigned Imm = MI.getOperand(3).getImm();
820 switch (AArch64_AM::getArithExtendType(Imm)) {
821 default:
822 return false;
823 case AArch64_AM::UXTB:
824 case AArch64_AM::UXTH:
825 case AArch64_AM::UXTW:
826 case AArch64_AM::UXTX:
827 return AArch64_AM::getArithShiftValue(Imm) == 0;
828 }
829 }
830
831 case AArch64::LDRBBroW:
832 case AArch64::LDRBBroX:
833 case AArch64::LDRBroW:
834 case AArch64::LDRBroX:
835 case AArch64::LDRDroW:
836 case AArch64::LDRDroX:
837 case AArch64::LDRHHroW:
838 case AArch64::LDRHHroX:
839 case AArch64::LDRHroW:
840 case AArch64::LDRHroX:
841 case AArch64::LDRQroW:
842 case AArch64::LDRQroX:
843 case AArch64::LDRSBWroW:
844 case AArch64::LDRSBWroX:
845 case AArch64::LDRSBXroW:
846 case AArch64::LDRSBXroX:
847 case AArch64::LDRSHWroW:
848 case AArch64::LDRSHWroX:
849 case AArch64::LDRSHXroW:
850 case AArch64::LDRSHXroX:
851 case AArch64::LDRSWroW:
852 case AArch64::LDRSWroX:
853 case AArch64::LDRSroW:
854 case AArch64::LDRSroX:
855 case AArch64::LDRWroW:
856 case AArch64::LDRWroX:
857 case AArch64::LDRXroW:
858 case AArch64::LDRXroX:
859 case AArch64::PRFMroW:
860 case AArch64::PRFMroX:
861 case AArch64::STRBBroW:
862 case AArch64::STRBBroX:
863 case AArch64::STRBroW:
864 case AArch64::STRBroX:
865 case AArch64::STRDroW:
866 case AArch64::STRDroX:
867 case AArch64::STRHHroW:
868 case AArch64::STRHHroX:
869 case AArch64::STRHroW:
870 case AArch64::STRHroX:
871 case AArch64::STRQroW:
872 case AArch64::STRQroX:
873 case AArch64::STRSroW:
874 case AArch64::STRSroX:
875 case AArch64::STRWroW:
876 case AArch64::STRWroX:
877 case AArch64::STRXroW:
878 case AArch64::STRXroX: {
879 unsigned IsSigned = MI.getOperand(3).getImm();
880 return !IsSigned;
881 }
882 }
Balaram Makamb4419f92017-04-08 03:30:15 +0000883}
884
Tim Northover3b0846e2014-05-24 12:50:23 +0000885bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
886 unsigned &SrcReg, unsigned &DstReg,
887 unsigned &SubIdx) const {
888 switch (MI.getOpcode()) {
889 default:
890 return false;
891 case AArch64::SBFMXri: // aka sxtw
892 case AArch64::UBFMXri: // aka uxtw
893 // Check for the 32 -> 64 bit extension case, these instructions can do
894 // much more.
895 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
896 return false;
897 // This is a signed or unsigned 32 -> 64 bit extension.
898 SrcReg = MI.getOperand(1).getReg();
899 DstReg = MI.getOperand(0).getReg();
900 SubIdx = AArch64::sub_32;
901 return true;
902 }
903}
904
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000905bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
906 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
Eric Christophera0de2532015-03-18 20:37:30 +0000907 const TargetRegisterInfo *TRI = &getRegisterInfo();
Chad Rosier3528c1e2014-09-08 14:43:48 +0000908 unsigned BaseRegA = 0, BaseRegB = 0;
Chad Rosier0da267d2016-03-09 16:46:48 +0000909 int64_t OffsetA = 0, OffsetB = 0;
910 unsigned WidthA = 0, WidthB = 0;
Chad Rosier3528c1e2014-09-08 14:43:48 +0000911
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000912 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
913 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
Chad Rosier3528c1e2014-09-08 14:43:48 +0000914
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000915 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
916 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
Chad Rosier3528c1e2014-09-08 14:43:48 +0000917 return false;
918
919 // Retrieve the base register, offset from the base register and width. Width
920 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
921 // base registers are identical, and the offset of a lower memory access +
922 // the width doesn't overlap the offset of a higher memory access,
923 // then the memory accesses are different.
Sanjoy Dasb666ea32015-06-15 18:44:14 +0000924 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
925 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
Chad Rosier3528c1e2014-09-08 14:43:48 +0000926 if (BaseRegA == BaseRegB) {
927 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
928 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
929 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
930 if (LowOffset + LowWidth <= HighOffset)
931 return true;
932 }
933 }
934 return false;
935}
936
Tim Northover3b0846e2014-05-24 12:50:23 +0000937/// analyzeCompare - For a comparison instruction, return the source registers
938/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
939/// Return true if the comparison instruction can be analyzed.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000940bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
Tim Northover3b0846e2014-05-24 12:50:23 +0000941 unsigned &SrcReg2, int &CmpMask,
942 int &CmpValue) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000943 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000944 default:
945 break;
946 case AArch64::SUBSWrr:
947 case AArch64::SUBSWrs:
948 case AArch64::SUBSWrx:
949 case AArch64::SUBSXrr:
950 case AArch64::SUBSXrs:
951 case AArch64::SUBSXrx:
952 case AArch64::ADDSWrr:
953 case AArch64::ADDSWrs:
954 case AArch64::ADDSWrx:
955 case AArch64::ADDSXrr:
956 case AArch64::ADDSXrs:
957 case AArch64::ADDSXrx:
958 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000959 SrcReg = MI.getOperand(1).getReg();
960 SrcReg2 = MI.getOperand(2).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000961 CmpMask = ~0;
962 CmpValue = 0;
963 return true;
964 case AArch64::SUBSWri:
965 case AArch64::ADDSWri:
966 case AArch64::SUBSXri:
967 case AArch64::ADDSXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000968 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000969 SrcReg2 = 0;
970 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000971 // FIXME: In order to convert CmpValue to 0 or 1
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000972 CmpValue = MI.getOperand(2).getImm() != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000973 return true;
974 case AArch64::ANDSWri:
975 case AArch64::ANDSXri:
976 // ANDS does not use the same encoding scheme as the others xxxS
977 // instructions.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000978 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000979 SrcReg2 = 0;
980 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000981 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
982 // while the type of CmpValue is int. When converting uint64_t to int,
983 // the high 32 bits of uint64_t will be lost.
984 // In fact it causes a bug in spec2006-483.xalancbmk
985 // CmpValue is only used to compare with zero in OptimizeCompareInstr
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000986 CmpValue = AArch64_AM::decodeLogicalImmediate(
987 MI.getOperand(2).getImm(),
988 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000989 return true;
990 }
991
992 return false;
993}
994
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000995static bool UpdateOperandRegClass(MachineInstr &Instr) {
996 MachineBasicBlock *MBB = Instr.getParent();
Tim Northover3b0846e2014-05-24 12:50:23 +0000997 assert(MBB && "Can't get MachineBasicBlock here");
998 MachineFunction *MF = MBB->getParent();
999 assert(MF && "Can't get MachineFunction here");
Eric Christopher6c901622015-01-28 03:51:33 +00001000 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1001 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001002 MachineRegisterInfo *MRI = &MF->getRegInfo();
1003
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001004 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
Tim Northover3b0846e2014-05-24 12:50:23 +00001005 ++OpIdx) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001006 MachineOperand &MO = Instr.getOperand(OpIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001007 const TargetRegisterClass *OpRegCstraints =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001008 Instr.getRegClassConstraint(OpIdx, TII, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001009
1010 // If there's no constraint, there's nothing to do.
1011 if (!OpRegCstraints)
1012 continue;
1013 // If the operand is a frame index, there's nothing to do here.
1014 // A frame index operand will resolve correctly during PEI.
1015 if (MO.isFI())
1016 continue;
1017
1018 assert(MO.isReg() &&
1019 "Operand has register constraints without being a register!");
1020
1021 unsigned Reg = MO.getReg();
1022 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1023 if (!OpRegCstraints->contains(Reg))
1024 return false;
1025 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1026 !MRI->constrainRegClass(Reg, OpRegCstraints))
1027 return false;
1028 }
1029
1030 return true;
1031}
1032
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001033/// \brief Return the opcode that does not set flags when possible - otherwise
1034/// return the original opcode. The caller is responsible to do the actual
1035/// substitution and legality checking.
Chad Rosier6db9ff62017-06-23 19:20:12 +00001036static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001037 // Don't convert all compare instructions, because for some the zero register
1038 // encoding becomes the sp register.
1039 bool MIDefinesZeroReg = false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001040 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001041 MIDefinesZeroReg = true;
1042
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001043 switch (MI.getOpcode()) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001044 default:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001045 return MI.getOpcode();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001046 case AArch64::ADDSWrr:
1047 return AArch64::ADDWrr;
1048 case AArch64::ADDSWri:
1049 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1050 case AArch64::ADDSWrs:
1051 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1052 case AArch64::ADDSWrx:
1053 return AArch64::ADDWrx;
1054 case AArch64::ADDSXrr:
1055 return AArch64::ADDXrr;
1056 case AArch64::ADDSXri:
1057 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1058 case AArch64::ADDSXrs:
1059 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1060 case AArch64::ADDSXrx:
1061 return AArch64::ADDXrx;
1062 case AArch64::SUBSWrr:
1063 return AArch64::SUBWrr;
1064 case AArch64::SUBSWri:
1065 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1066 case AArch64::SUBSWrs:
1067 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1068 case AArch64::SUBSWrx:
1069 return AArch64::SUBWrx;
1070 case AArch64::SUBSXrr:
1071 return AArch64::SUBXrr;
1072 case AArch64::SUBSXri:
1073 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1074 case AArch64::SUBSXrs:
1075 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1076 case AArch64::SUBSXrx:
1077 return AArch64::SUBXrx;
1078 }
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001079}
Tim Northover3b0846e2014-05-24 12:50:23 +00001080
Jessica Paquette809d7082017-07-28 03:21:58 +00001081enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001082
1083/// True when condition flags are accessed (either by writing or reading)
1084/// on the instruction trace starting at From and ending at To.
1085///
1086/// Note: If From and To are from different blocks it's assumed CC are accessed
1087/// on the path.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001088static bool areCFlagsAccessedBetweenInstrs(
1089 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1090 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001091 // Early exit if To is at the beginning of the BB.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001092 if (To == To->getParent()->begin())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001093 return true;
1094
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001095 // Check whether the instructions are in the same basic block
1096 // If not, assume the condition flags might get modified somewhere.
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001097 if (To->getParent() != From->getParent())
1098 return true;
1099
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001100 // From must be above To.
Duncan P. N. Exon Smith18720962016-09-11 18:51:28 +00001101 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1102 [From](MachineInstr &MI) {
1103 return MI.getIterator() == From;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001104 }) != To->getParent()->rend());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001105
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001106 // We iterate backward starting \p To until we hit \p From.
1107 for (--To; To != From; --To) {
1108 const MachineInstr &Instr = *To;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001109
Jessica Paquette809d7082017-07-28 03:21:58 +00001110 if (((AccessToCheck & AK_Write) &&
1111 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1112 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001113 return true;
1114 }
1115 return false;
1116}
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001117
1118/// Try to optimize a compare instruction. A compare instruction is an
Jessica Paquette809d7082017-07-28 03:21:58 +00001119/// instruction which produces AArch64::NZCV. It can be truly compare
1120/// instruction
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001121/// when there are no uses of its destination register.
1122///
1123/// The following steps are tried in order:
1124/// 1. Convert CmpInstr into an unconditional version.
1125/// 2. Remove CmpInstr if above there is an instruction producing a needed
Jessica Paquette809d7082017-07-28 03:21:58 +00001126/// condition code or an instruction which can be converted into such an
1127/// instruction.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001128/// Only comparison with zero is supported.
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001129bool AArch64InstrInfo::optimizeCompareInstr(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001130 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001131 int CmpValue, const MachineRegisterInfo *MRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001132 assert(CmpInstr.getParent());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001133 assert(MRI);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001134
1135 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001136 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001137 if (DeadNZCVIdx != -1) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001138 if (CmpInstr.definesRegister(AArch64::WZR) ||
1139 CmpInstr.definesRegister(AArch64::XZR)) {
1140 CmpInstr.eraseFromParent();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001141 return true;
1142 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001143 unsigned Opc = CmpInstr.getOpcode();
Chad Rosier6db9ff62017-06-23 19:20:12 +00001144 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001145 if (NewOpc == Opc)
1146 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001147 const MCInstrDesc &MCID = get(NewOpc);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001148 CmpInstr.setDesc(MCID);
1149 CmpInstr.RemoveOperand(DeadNZCVIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001150 bool succeeded = UpdateOperandRegClass(CmpInstr);
1151 (void)succeeded;
1152 assert(succeeded && "Some operands reg class are incompatible!");
1153 return true;
1154 }
1155
1156 // Continue only if we have a "ri" where immediate is zero.
Jiangning Liudcc651f2014-08-08 14:19:29 +00001157 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1158 // function.
1159 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
Tim Northover3b0846e2014-05-24 12:50:23 +00001160 if (CmpValue != 0 || SrcReg2 != 0)
1161 return false;
1162
1163 // CmpInstr is a Compare instruction if destination register is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001164 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
Tim Northover3b0846e2014-05-24 12:50:23 +00001165 return false;
1166
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001167 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001168}
Tim Northover3b0846e2014-05-24 12:50:23 +00001169
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001170/// Get opcode of S version of Instr.
1171/// If Instr is S version its opcode is returned.
1172/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1173/// or we are not interested in it.
1174static unsigned sForm(MachineInstr &Instr) {
1175 switch (Instr.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001176 default:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001177 return AArch64::INSTRUCTION_LIST_END;
1178
Tim Northover3b0846e2014-05-24 12:50:23 +00001179 case AArch64::ADDSWrr:
1180 case AArch64::ADDSWri:
1181 case AArch64::ADDSXrr:
1182 case AArch64::ADDSXri:
1183 case AArch64::SUBSWrr:
1184 case AArch64::SUBSWri:
1185 case AArch64::SUBSXrr:
1186 case AArch64::SUBSXri:
Eugene Zelenko049b0172017-01-06 00:30:53 +00001187 return Instr.getOpcode();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001188
Jessica Paquette809d7082017-07-28 03:21:58 +00001189 case AArch64::ADDWrr:
1190 return AArch64::ADDSWrr;
1191 case AArch64::ADDWri:
1192 return AArch64::ADDSWri;
1193 case AArch64::ADDXrr:
1194 return AArch64::ADDSXrr;
1195 case AArch64::ADDXri:
1196 return AArch64::ADDSXri;
1197 case AArch64::ADCWr:
1198 return AArch64::ADCSWr;
1199 case AArch64::ADCXr:
1200 return AArch64::ADCSXr;
1201 case AArch64::SUBWrr:
1202 return AArch64::SUBSWrr;
1203 case AArch64::SUBWri:
1204 return AArch64::SUBSWri;
1205 case AArch64::SUBXrr:
1206 return AArch64::SUBSXrr;
1207 case AArch64::SUBXri:
1208 return AArch64::SUBSXri;
1209 case AArch64::SBCWr:
1210 return AArch64::SBCSWr;
1211 case AArch64::SBCXr:
1212 return AArch64::SBCSXr;
1213 case AArch64::ANDWri:
1214 return AArch64::ANDSWri;
1215 case AArch64::ANDXri:
1216 return AArch64::ANDSXri;
Tim Northover3b0846e2014-05-24 12:50:23 +00001217 }
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001218}
1219
1220/// Check if AArch64::NZCV should be alive in successors of MBB.
1221static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1222 for (auto *BB : MBB->successors())
1223 if (BB->isLiveIn(AArch64::NZCV))
1224 return true;
1225 return false;
1226}
1227
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001228namespace {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001229
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001230struct UsedNZCV {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001231 bool N = false;
1232 bool Z = false;
1233 bool C = false;
1234 bool V = false;
1235
1236 UsedNZCV() = default;
1237
Jessica Paquette809d7082017-07-28 03:21:58 +00001238 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001239 this->N |= UsedFlags.N;
1240 this->Z |= UsedFlags.Z;
1241 this->C |= UsedFlags.C;
1242 this->V |= UsedFlags.V;
1243 return *this;
1244 }
1245};
Eugene Zelenko049b0172017-01-06 00:30:53 +00001246
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001247} // end anonymous namespace
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001248
1249/// Find a condition code used by the instruction.
1250/// Returns AArch64CC::Invalid if either the instruction does not use condition
1251/// codes or we don't optimize CmpInstr in the presence of such instructions.
1252static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1253 switch (Instr.getOpcode()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001254 default:
1255 return AArch64CC::Invalid;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001256
Jessica Paquette809d7082017-07-28 03:21:58 +00001257 case AArch64::Bcc: {
1258 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1259 assert(Idx >= 2);
1260 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1261 }
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001262
Jessica Paquette809d7082017-07-28 03:21:58 +00001263 case AArch64::CSINVWr:
1264 case AArch64::CSINVXr:
1265 case AArch64::CSINCWr:
1266 case AArch64::CSINCXr:
1267 case AArch64::CSELWr:
1268 case AArch64::CSELXr:
1269 case AArch64::CSNEGWr:
1270 case AArch64::CSNEGXr:
1271 case AArch64::FCSELSrrr:
1272 case AArch64::FCSELDrrr: {
1273 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1274 assert(Idx >= 1);
1275 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1276 }
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001277 }
1278}
1279
1280static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1281 assert(CC != AArch64CC::Invalid);
1282 UsedNZCV UsedFlags;
1283 switch (CC) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001284 default:
1285 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001286
Jessica Paquette809d7082017-07-28 03:21:58 +00001287 case AArch64CC::EQ: // Z set
1288 case AArch64CC::NE: // Z clear
1289 UsedFlags.Z = true;
1290 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001291
Jessica Paquette809d7082017-07-28 03:21:58 +00001292 case AArch64CC::HI: // Z clear and C set
1293 case AArch64CC::LS: // Z set or C clear
1294 UsedFlags.Z = true;
1295 LLVM_FALLTHROUGH;
1296 case AArch64CC::HS: // C set
1297 case AArch64CC::LO: // C clear
1298 UsedFlags.C = true;
1299 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001300
Jessica Paquette809d7082017-07-28 03:21:58 +00001301 case AArch64CC::MI: // N set
1302 case AArch64CC::PL: // N clear
1303 UsedFlags.N = true;
1304 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001305
Jessica Paquette809d7082017-07-28 03:21:58 +00001306 case AArch64CC::VS: // V set
1307 case AArch64CC::VC: // V clear
1308 UsedFlags.V = true;
1309 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001310
Jessica Paquette809d7082017-07-28 03:21:58 +00001311 case AArch64CC::GT: // Z clear, N and V the same
1312 case AArch64CC::LE: // Z set, N and V differ
1313 UsedFlags.Z = true;
1314 LLVM_FALLTHROUGH;
1315 case AArch64CC::GE: // N and V the same
1316 case AArch64CC::LT: // N and V differ
1317 UsedFlags.N = true;
1318 UsedFlags.V = true;
1319 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001320 }
1321 return UsedFlags;
1322}
1323
1324static bool isADDSRegImm(unsigned Opcode) {
1325 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1326}
1327
1328static bool isSUBSRegImm(unsigned Opcode) {
1329 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1330}
1331
1332/// Check if CmpInstr can be substituted by MI.
1333///
1334/// CmpInstr can be substituted:
1335/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1336/// - and, MI and CmpInstr are from the same MachineBB
1337/// - and, condition flags are not alive in successors of the CmpInstr parent
1338/// - and, if MI opcode is the S form there must be no defs of flags between
1339/// MI and CmpInstr
1340/// or if MI opcode is not the S form there must be neither defs of flags
1341/// nor uses of flags between MI and CmpInstr.
1342/// - and C/V flags are not used after CmpInstr
1343static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
Jessica Paquette809d7082017-07-28 03:21:58 +00001344 const TargetRegisterInfo *TRI) {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001345 assert(MI);
1346 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1347 assert(CmpInstr);
1348
1349 const unsigned CmpOpcode = CmpInstr->getOpcode();
1350 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1351 return false;
1352
1353 if (MI->getParent() != CmpInstr->getParent())
1354 return false;
1355
1356 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1357 return false;
1358
1359 AccessKind AccessToCheck = AK_Write;
1360 if (sForm(*MI) != MI->getOpcode())
1361 AccessToCheck = AK_All;
1362 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1363 return false;
1364
1365 UsedNZCV NZCVUsedAfterCmp;
Jessica Paquette809d7082017-07-28 03:21:58 +00001366 for (auto I = std::next(CmpInstr->getIterator()),
1367 E = CmpInstr->getParent()->instr_end();
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001368 I != E; ++I) {
1369 const MachineInstr &Instr = *I;
1370 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1371 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1372 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1373 return false;
1374 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1375 }
1376
1377 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1378 break;
1379 }
Jessica Paquette809d7082017-07-28 03:21:58 +00001380
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001381 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1382}
1383
1384/// Substitute an instruction comparing to zero with another instruction
1385/// which produces needed condition flags.
1386///
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001387/// Return true on success.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001388bool AArch64InstrInfo::substituteCmpToZero(
1389 MachineInstr &CmpInstr, unsigned SrcReg,
1390 const MachineRegisterInfo *MRI) const {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001391 assert(MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001392 // Get the unique definition of SrcReg.
1393 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1394 if (!MI)
1395 return false;
1396
1397 const TargetRegisterInfo *TRI = &getRegisterInfo();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001398
1399 unsigned NewOpc = sForm(*MI);
1400 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1401 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001402
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001403 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001404 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001405
1406 // Update the instruction to set NZCV.
1407 MI->setDesc(get(NewOpc));
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001408 CmpInstr.eraseFromParent();
1409 bool succeeded = UpdateOperandRegClass(*MI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001410 (void)succeeded;
1411 assert(succeeded && "Some operands reg class are incompatible!");
1412 MI->addRegisterDefined(AArch64::NZCV, TRI);
1413 return true;
1414}
1415
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001416bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1417 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001418 return false;
1419
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001420 MachineBasicBlock &MBB = *MI.getParent();
1421 DebugLoc DL = MI.getDebugLoc();
1422 unsigned Reg = MI.getOperand(0).getReg();
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001423 const GlobalValue *GV =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001424 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001425 const TargetMachine &TM = MBB.getParent()->getTarget();
1426 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1427 const unsigned char MO_NC = AArch64II::MO_NC;
1428
1429 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1430 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1431 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1432 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001433 .addReg(Reg, RegState::Kill)
1434 .addImm(0)
1435 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001436 } else if (TM.getCodeModel() == CodeModel::Large) {
1437 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
Jessica Paquette809d7082017-07-28 03:21:58 +00001438 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1439 .addImm(0);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001440 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1441 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001442 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1443 .addImm(16);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001444 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1445 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001446 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1447 .addImm(32);
Evandro Menezes7960b2e2017-01-18 18:57:08 +00001448 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1449 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001450 .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1451 .addImm(48);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001452 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001453 .addReg(Reg, RegState::Kill)
1454 .addImm(0)
1455 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001456 } else {
1457 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1458 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1459 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1460 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1461 .addReg(Reg, RegState::Kill)
1462 .addGlobalAddress(GV, 0, LoFlags)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001463 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001464 }
1465
1466 MBB.erase(MI);
1467
1468 return true;
1469}
1470
Tim Northover3b0846e2014-05-24 12:50:23 +00001471/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001472bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1473 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001474 default:
1475 break;
1476 case AArch64::ADDSWrs:
1477 case AArch64::ADDSXrs:
1478 case AArch64::ADDWrs:
1479 case AArch64::ADDXrs:
1480 case AArch64::ANDSWrs:
1481 case AArch64::ANDSXrs:
1482 case AArch64::ANDWrs:
1483 case AArch64::ANDXrs:
1484 case AArch64::BICSWrs:
1485 case AArch64::BICSXrs:
1486 case AArch64::BICWrs:
1487 case AArch64::BICXrs:
Tim Northover3b0846e2014-05-24 12:50:23 +00001488 case AArch64::EONWrs:
1489 case AArch64::EONXrs:
1490 case AArch64::EORWrs:
1491 case AArch64::EORXrs:
1492 case AArch64::ORNWrs:
1493 case AArch64::ORNXrs:
1494 case AArch64::ORRWrs:
1495 case AArch64::ORRXrs:
1496 case AArch64::SUBSWrs:
1497 case AArch64::SUBSXrs:
1498 case AArch64::SUBWrs:
1499 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001500 if (MI.getOperand(3).isImm()) {
1501 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001502 return (val != 0);
1503 }
1504 break;
1505 }
1506 return false;
1507}
1508
1509/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001510bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1511 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001512 default:
1513 break;
1514 case AArch64::ADDSWrx:
1515 case AArch64::ADDSXrx:
1516 case AArch64::ADDSXrx64:
1517 case AArch64::ADDWrx:
1518 case AArch64::ADDXrx:
1519 case AArch64::ADDXrx64:
1520 case AArch64::SUBSWrx:
1521 case AArch64::SUBSXrx:
1522 case AArch64::SUBSXrx64:
1523 case AArch64::SUBWrx:
1524 case AArch64::SUBXrx:
1525 case AArch64::SUBXrx64:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001526 if (MI.getOperand(3).isImm()) {
1527 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001528 return (val != 0);
1529 }
1530 break;
1531 }
1532
1533 return false;
1534}
1535
1536// Return true if this instruction simply sets its single destination register
1537// to zero. This is equivalent to a register rename of the zero-register.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001538bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1539 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001540 default:
1541 break;
1542 case AArch64::MOVZWi:
1543 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001544 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1545 assert(MI.getDesc().getNumOperands() == 3 &&
1546 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001547 return true;
1548 }
1549 break;
1550 case AArch64::ANDWri: // and Rd, Rzr, #imm
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001551 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001552 case AArch64::ANDXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001553 return MI.getOperand(1).getReg() == AArch64::XZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001554 case TargetOpcode::COPY:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001555 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001556 }
1557 return false;
1558}
1559
1560// Return true if this instruction simply renames a general register without
1561// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001562bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1563 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001564 default:
1565 break;
1566 case TargetOpcode::COPY: {
1567 // GPR32 copies will by lowered to ORRXrs
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001568 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001569 return (AArch64::GPR32RegClass.contains(DstReg) ||
1570 AArch64::GPR64RegClass.contains(DstReg));
1571 }
1572 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001573 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1574 assert(MI.getDesc().getNumOperands() == 4 &&
1575 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001576 return true;
1577 }
Renato Golin541d7e72014-08-01 17:27:31 +00001578 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001579 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001580 if (MI.getOperand(2).getImm() == 0) {
1581 assert(MI.getDesc().getNumOperands() == 4 &&
1582 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001583 return true;
1584 }
Renato Golin541d7e72014-08-01 17:27:31 +00001585 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001586 }
1587 return false;
1588}
1589
1590// Return true if this instruction simply renames a general register without
1591// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001592bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1593 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001594 default:
1595 break;
1596 case TargetOpcode::COPY: {
1597 // FPR64 copies will by lowered to ORR.16b
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001598 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001599 return (AArch64::FPR64RegClass.contains(DstReg) ||
1600 AArch64::FPR128RegClass.contains(DstReg));
1601 }
1602 case AArch64::ORRv16i8:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001603 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1604 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001605 "invalid ORRv16i8 operands");
1606 return true;
1607 }
Renato Golin541d7e72014-08-01 17:27:31 +00001608 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001609 }
1610 return false;
1611}
1612
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001613unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001614 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001615 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001616 default:
1617 break;
1618 case AArch64::LDRWui:
1619 case AArch64::LDRXui:
1620 case AArch64::LDRBui:
1621 case AArch64::LDRHui:
1622 case AArch64::LDRSui:
1623 case AArch64::LDRDui:
1624 case AArch64::LDRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001625 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1626 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1627 FrameIndex = MI.getOperand(1).getIndex();
1628 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001629 }
1630 break;
1631 }
1632
1633 return 0;
1634}
1635
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001636unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001637 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001638 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001639 default:
1640 break;
1641 case AArch64::STRWui:
1642 case AArch64::STRXui:
1643 case AArch64::STRBui:
1644 case AArch64::STRHui:
1645 case AArch64::STRSui:
1646 case AArch64::STRDui:
1647 case AArch64::STRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001648 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1649 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1650 FrameIndex = MI.getOperand(1).getIndex();
1651 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001652 }
1653 break;
1654 }
1655 return 0;
1656}
1657
1658/// Return true if this is load/store scales or extends its register offset.
1659/// This refers to scaling a dynamic index as opposed to scaled immediates.
1660/// MI should be a memory op that allows scaled addressing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001661bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1662 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001663 default:
1664 break;
1665 case AArch64::LDRBBroW:
1666 case AArch64::LDRBroW:
1667 case AArch64::LDRDroW:
1668 case AArch64::LDRHHroW:
1669 case AArch64::LDRHroW:
1670 case AArch64::LDRQroW:
1671 case AArch64::LDRSBWroW:
1672 case AArch64::LDRSBXroW:
1673 case AArch64::LDRSHWroW:
1674 case AArch64::LDRSHXroW:
1675 case AArch64::LDRSWroW:
1676 case AArch64::LDRSroW:
1677 case AArch64::LDRWroW:
1678 case AArch64::LDRXroW:
1679 case AArch64::STRBBroW:
1680 case AArch64::STRBroW:
1681 case AArch64::STRDroW:
1682 case AArch64::STRHHroW:
1683 case AArch64::STRHroW:
1684 case AArch64::STRQroW:
1685 case AArch64::STRSroW:
1686 case AArch64::STRWroW:
1687 case AArch64::STRXroW:
1688 case AArch64::LDRBBroX:
1689 case AArch64::LDRBroX:
1690 case AArch64::LDRDroX:
1691 case AArch64::LDRHHroX:
1692 case AArch64::LDRHroX:
1693 case AArch64::LDRQroX:
1694 case AArch64::LDRSBWroX:
1695 case AArch64::LDRSBXroX:
1696 case AArch64::LDRSHWroX:
1697 case AArch64::LDRSHXroX:
1698 case AArch64::LDRSWroX:
1699 case AArch64::LDRSroX:
1700 case AArch64::LDRWroX:
1701 case AArch64::LDRXroX:
1702 case AArch64::STRBBroX:
1703 case AArch64::STRBroX:
1704 case AArch64::STRDroX:
1705 case AArch64::STRHHroX:
1706 case AArch64::STRHroX:
1707 case AArch64::STRQroX:
1708 case AArch64::STRSroX:
1709 case AArch64::STRWroX:
1710 case AArch64::STRXroX:
1711
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001712 unsigned Val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001713 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1714 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1715 }
1716 return false;
1717}
1718
1719/// Check all MachineMemOperands for a hint to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001720bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001721 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
Justin Lebar288b3372016-07-14 18:15:20 +00001722 return MMO->getFlags() & MOSuppressPair;
1723 });
Tim Northover3b0846e2014-05-24 12:50:23 +00001724}
1725
1726/// Set a flag on the first MachineMemOperand to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001727void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1728 if (MI.memoperands_empty())
Tim Northover3b0846e2014-05-24 12:50:23 +00001729 return;
Justin Lebar288b3372016-07-14 18:15:20 +00001730 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
Tim Northover3b0846e2014-05-24 12:50:23 +00001731}
1732
Geoff Berryb1e87142017-07-14 21:44:12 +00001733/// Check all MachineMemOperands for a hint that the load/store is strided.
1734bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const {
1735 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1736 return MMO->getFlags() & MOStridedAccess;
1737 });
1738}
1739
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001740bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1741 switch (Opc) {
1742 default:
1743 return false;
1744 case AArch64::STURSi:
1745 case AArch64::STURDi:
1746 case AArch64::STURQi:
1747 case AArch64::STURBBi:
1748 case AArch64::STURHHi:
1749 case AArch64::STURWi:
1750 case AArch64::STURXi:
1751 case AArch64::LDURSi:
1752 case AArch64::LDURDi:
1753 case AArch64::LDURQi:
1754 case AArch64::LDURWi:
1755 case AArch64::LDURXi:
1756 case AArch64::LDURSWi:
1757 case AArch64::LDURHHi:
1758 case AArch64::LDURBBi:
1759 case AArch64::LDURSBWi:
1760 case AArch64::LDURSHWi:
1761 return true;
1762 }
1763}
1764
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001765bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1766 return isUnscaledLdSt(MI.getOpcode());
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001767}
1768
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001769// Is this a candidate for ld/st merging or pairing? For example, we don't
1770// touch volatiles or load/stores that have a hint to avoid pair formation.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001771bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001772 // If this is a volatile load/store, don't mess with it.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001773 if (MI.hasOrderedMemoryRef())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001774 return false;
1775
1776 // Make sure this is a reg+imm (as opposed to an address reloc).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001777 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1778 if (!MI.getOperand(2).isImm())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001779 return false;
1780
1781 // Can't merge/pair if the instruction modifies the base register.
1782 // e.g., ldr x0, [x0]
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001783 unsigned BaseReg = MI.getOperand(1).getReg();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001784 const TargetRegisterInfo *TRI = &getRegisterInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001785 if (MI.modifiesRegister(BaseReg, TRI))
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001786 return false;
1787
1788 // Check if this load/store has a hint to avoid pair formation.
1789 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1790 if (isLdStPairSuppressed(MI))
1791 return false;
1792
Matthias Braun651cff42016-06-02 18:03:53 +00001793 // On some CPUs quad load/store pairs are slower than two single load/stores.
Evandro Menezes7784cac2017-01-24 17:34:31 +00001794 if (Subtarget.isPaired128Slow()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001795 switch (MI.getOpcode()) {
Matthias Braunbcfd2362016-05-28 01:06:51 +00001796 default:
1797 break;
Matthias Braunbcfd2362016-05-28 01:06:51 +00001798 case AArch64::LDURQi:
1799 case AArch64::STURQi:
1800 case AArch64::LDRQui:
1801 case AArch64::STRQui:
1802 return false;
Evandro Menezes8d53f882016-04-13 18:31:45 +00001803 }
Matthias Braunbcfd2362016-05-28 01:06:51 +00001804 }
Evandro Menezes8d53f882016-04-13 18:31:45 +00001805
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001806 return true;
1807}
1808
Chad Rosierc27a18f2016-03-09 16:00:35 +00001809bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001810 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
Chad Rosierc27a18f2016-03-09 16:00:35 +00001811 const TargetRegisterInfo *TRI) const {
Geoff Berry22dfbc52016-08-12 15:26:00 +00001812 unsigned Width;
1813 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001814}
1815
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001816bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001817 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
Chad Rosier3528c1e2014-09-08 14:43:48 +00001818 const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001819 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001820 // Handle only loads/stores with base register followed by immediate offset.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001821 if (LdSt.getNumExplicitOperands() == 3) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001822 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001823 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001824 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001825 } else if (LdSt.getNumExplicitOperands() == 4) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001826 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001827 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1828 !LdSt.getOperand(3).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001829 return false;
1830 } else
Chad Rosier3528c1e2014-09-08 14:43:48 +00001831 return false;
1832
Jessica Paquette809d7082017-07-28 03:21:58 +00001833 // Get the scaling factor for the instruction and set the width for the
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001834 // instruction.
Chad Rosier0da267d2016-03-09 16:46:48 +00001835 unsigned Scale = 0;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001836 int64_t Dummy1, Dummy2;
1837
1838 // If this returns false, then it's an instruction we don't want to handle.
1839 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
Chad Rosier3528c1e2014-09-08 14:43:48 +00001840 return false;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001841
1842 // Compute the offset. Offset is calculated as the immediate operand
1843 // multiplied by the scaling factor. Unscaled instructions have scaling factor
1844 // set to 1.
1845 if (LdSt.getNumExplicitOperands() == 3) {
1846 BaseReg = LdSt.getOperand(1).getReg();
1847 Offset = LdSt.getOperand(2).getImm() * Scale;
1848 } else {
1849 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1850 BaseReg = LdSt.getOperand(2).getReg();
1851 Offset = LdSt.getOperand(3).getImm() * Scale;
1852 }
1853 return true;
1854}
1855
Jessica Paquette809d7082017-07-28 03:21:58 +00001856MachineOperand &
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001857AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
1858 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Jessica Paquette809d7082017-07-28 03:21:58 +00001859 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001860 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1861 return OfsOp;
1862}
1863
1864bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1865 unsigned &Width, int64_t &MinOffset,
1866 int64_t &MaxOffset) const {
1867 switch (Opcode) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001868 // Not a memory operation or something we want to handle.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001869 default:
1870 Scale = Width = 0;
1871 MinOffset = MaxOffset = 0;
1872 return false;
1873 case AArch64::STRWpost:
1874 case AArch64::LDRWpost:
1875 Width = 32;
1876 Scale = 4;
1877 MinOffset = -256;
1878 MaxOffset = 255;
1879 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001880 case AArch64::LDURQi:
1881 case AArch64::STURQi:
1882 Width = 16;
1883 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001884 MinOffset = -256;
1885 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001886 break;
1887 case AArch64::LDURXi:
1888 case AArch64::LDURDi:
1889 case AArch64::STURXi:
1890 case AArch64::STURDi:
1891 Width = 8;
1892 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001893 MinOffset = -256;
1894 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001895 break;
1896 case AArch64::LDURWi:
1897 case AArch64::LDURSi:
1898 case AArch64::LDURSWi:
1899 case AArch64::STURWi:
1900 case AArch64::STURSi:
1901 Width = 4;
1902 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001903 MinOffset = -256;
1904 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001905 break;
1906 case AArch64::LDURHi:
1907 case AArch64::LDURHHi:
1908 case AArch64::LDURSHXi:
1909 case AArch64::LDURSHWi:
1910 case AArch64::STURHi:
1911 case AArch64::STURHHi:
1912 Width = 2;
1913 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001914 MinOffset = -256;
1915 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001916 break;
1917 case AArch64::LDURBi:
1918 case AArch64::LDURBBi:
1919 case AArch64::LDURSBXi:
1920 case AArch64::LDURSBWi:
1921 case AArch64::STURBi:
1922 case AArch64::STURBBi:
1923 Width = 1;
1924 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001925 MinOffset = -256;
1926 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001927 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001928 case AArch64::LDPQi:
1929 case AArch64::LDNPQi:
1930 case AArch64::STPQi:
1931 case AArch64::STNPQi:
1932 Scale = 16;
1933 Width = 32;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001934 MinOffset = -64;
1935 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001936 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001937 case AArch64::LDRQui:
1938 case AArch64::STRQui:
1939 Scale = Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001940 MinOffset = 0;
1941 MaxOffset = 4095;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001942 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001943 case AArch64::LDPXi:
1944 case AArch64::LDPDi:
1945 case AArch64::LDNPXi:
1946 case AArch64::LDNPDi:
1947 case AArch64::STPXi:
1948 case AArch64::STPDi:
1949 case AArch64::STNPXi:
1950 case AArch64::STNPDi:
1951 Scale = 8;
1952 Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001953 MinOffset = -64;
1954 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001955 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001956 case AArch64::LDRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001957 case AArch64::LDRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001958 case AArch64::STRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001959 case AArch64::STRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001960 Scale = Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001961 MinOffset = 0;
1962 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001963 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001964 case AArch64::LDPWi:
1965 case AArch64::LDPSi:
1966 case AArch64::LDNPWi:
1967 case AArch64::LDNPSi:
1968 case AArch64::STPWi:
1969 case AArch64::STPSi:
1970 case AArch64::STNPWi:
1971 case AArch64::STNPSi:
1972 Scale = 4;
1973 Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001974 MinOffset = -64;
1975 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001976 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001977 case AArch64::LDRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001978 case AArch64::LDRSui:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001979 case AArch64::LDRSWui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001980 case AArch64::STRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001981 case AArch64::STRSui:
1982 Scale = Width = 4;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001983 MinOffset = 0;
1984 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001985 break;
Chad Rosier84a0afd2015-09-18 14:13:18 +00001986 case AArch64::LDRHui:
1987 case AArch64::LDRHHui:
1988 case AArch64::STRHui:
1989 case AArch64::STRHHui:
1990 Scale = Width = 2;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001991 MinOffset = 0;
1992 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001993 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001994 case AArch64::LDRBui:
1995 case AArch64::LDRBBui:
1996 case AArch64::STRBui:
1997 case AArch64::STRBBui:
1998 Scale = Width = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001999 MinOffset = 0;
2000 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002001 break;
Chad Rosier064261d2016-02-01 20:54:36 +00002002 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00002003
Chad Rosier3528c1e2014-09-08 14:43:48 +00002004 return true;
2005}
2006
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002007// Scale the unscaled offsets. Returns false if the unscaled offset can't be
2008// scaled.
2009static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2010 unsigned OffsetStride = 1;
2011 switch (Opc) {
2012 default:
2013 return false;
2014 case AArch64::LDURQi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002015 case AArch64::STURQi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002016 OffsetStride = 16;
2017 break;
2018 case AArch64::LDURXi:
2019 case AArch64::LDURDi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002020 case AArch64::STURXi:
2021 case AArch64::STURDi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002022 OffsetStride = 8;
2023 break;
2024 case AArch64::LDURWi:
2025 case AArch64::LDURSi:
2026 case AArch64::LDURSWi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002027 case AArch64::STURWi:
2028 case AArch64::STURSi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002029 OffsetStride = 4;
2030 break;
2031 }
2032 // If the byte-offset isn't a multiple of the stride, we can't scale this
2033 // offset.
2034 if (Offset % OffsetStride != 0)
2035 return false;
2036
2037 // Convert the byte-offset used by unscaled into an "element" offset used
2038 // by the scaled pair load/store instructions.
2039 Offset /= OffsetStride;
2040 return true;
2041}
2042
2043static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2044 if (FirstOpc == SecondOpc)
2045 return true;
2046 // We can also pair sign-ext and zero-ext instructions.
2047 switch (FirstOpc) {
2048 default:
2049 return false;
2050 case AArch64::LDRWui:
2051 case AArch64::LDURWi:
2052 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2053 case AArch64::LDRSWui:
2054 case AArch64::LDURSWi:
2055 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2056 }
2057 // These instructions can't be paired based on their opcodes.
2058 return false;
2059}
2060
Tim Northover3b0846e2014-05-24 12:50:23 +00002061/// Detect opportunities for ldp/stp formation.
2062///
Sanjoy Dasb666ea32015-06-15 18:44:14 +00002063/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002064bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
2065 MachineInstr &SecondLdSt,
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002066 unsigned NumLoads) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002067 // Only cluster up to a single pair.
2068 if (NumLoads > 1)
2069 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002070
Geoff Berry22dfbc52016-08-12 15:26:00 +00002071 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2072 return false;
2073
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002074 // Can we pair these instructions based on their opcodes?
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002075 unsigned FirstOpc = FirstLdSt.getOpcode();
2076 unsigned SecondOpc = SecondLdSt.getOpcode();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002077 if (!canPairLdStOpc(FirstOpc, SecondOpc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002078 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002079
2080 // Can't merge volatiles or load/stores that have a hint to avoid pair
2081 // formation, for example.
2082 if (!isCandidateToMergeOrPair(FirstLdSt) ||
2083 !isCandidateToMergeOrPair(SecondLdSt))
Tim Northover3b0846e2014-05-24 12:50:23 +00002084 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002085
2086 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002087 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002088 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2089 return false;
2090
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002091 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002092 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2093 return false;
2094
2095 // Pairwise instructions have a 7-bit signed offset field.
2096 if (Offset1 > 63 || Offset1 < -64)
2097 return false;
2098
Tim Northover3b0846e2014-05-24 12:50:23 +00002099 // The caller should already have ordered First/SecondLdSt by offset.
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002100 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2101 return Offset1 + 1 == Offset2;
Tim Northover3b0846e2014-05-24 12:50:23 +00002102}
2103
Tim Northover3b0846e2014-05-24 12:50:23 +00002104static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2105 unsigned Reg, unsigned SubIdx,
2106 unsigned State,
2107 const TargetRegisterInfo *TRI) {
2108 if (!SubIdx)
2109 return MIB.addReg(Reg, State);
2110
2111 if (TargetRegisterInfo::isPhysicalRegister(Reg))
2112 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2113 return MIB.addReg(Reg, State, SubIdx);
2114}
2115
2116static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2117 unsigned NumRegs) {
2118 // We really want the positive remainder mod 32 here, that happens to be
2119 // easily obtainable with a mask.
2120 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2121}
2122
Jessica Paquette809d7082017-07-28 03:21:58 +00002123void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2124 MachineBasicBlock::iterator I,
2125 const DebugLoc &DL, unsigned DestReg,
2126 unsigned SrcReg, bool KillSrc,
2127 unsigned Opcode,
2128 ArrayRef<unsigned> Indices) const {
2129 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
Eric Christophera0de2532015-03-18 20:37:30 +00002130 const TargetRegisterInfo *TRI = &getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002131 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2132 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2133 unsigned NumRegs = Indices.size();
2134
2135 int SubReg = 0, End = NumRegs, Incr = 1;
2136 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2137 SubReg = NumRegs - 1;
2138 End = -1;
2139 Incr = -1;
2140 }
2141
2142 for (; SubReg != End; SubReg += Incr) {
James Molloyf8aa57a2015-04-16 11:37:40 +00002143 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
Tim Northover3b0846e2014-05-24 12:50:23 +00002144 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2145 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2146 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2147 }
2148}
2149
2150void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002151 MachineBasicBlock::iterator I,
2152 const DebugLoc &DL, unsigned DestReg,
2153 unsigned SrcReg, bool KillSrc) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002154 if (AArch64::GPR32spRegClass.contains(DestReg) &&
2155 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
Eric Christophera0de2532015-03-18 20:37:30 +00002156 const TargetRegisterInfo *TRI = &getRegisterInfo();
2157
Tim Northover3b0846e2014-05-24 12:50:23 +00002158 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2159 // If either operand is WSP, expand to ADD #0.
2160 if (Subtarget.hasZeroCycleRegMove()) {
2161 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2162 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2163 &AArch64::GPR64spRegClass);
2164 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2165 &AArch64::GPR64spRegClass);
2166 // This instruction is reading and writing X registers. This may upset
2167 // the register scavenger and machine verifier, so we need to indicate
2168 // that we are reading an undefined value from SrcRegX, but a proper
2169 // value from SrcReg.
2170 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2171 .addReg(SrcRegX, RegState::Undef)
2172 .addImm(0)
2173 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2174 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2175 } else {
2176 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2177 .addReg(SrcReg, getKillRegState(KillSrc))
2178 .addImm(0)
2179 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2180 }
2181 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002182 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2183 .addImm(0)
2184 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002185 } else {
2186 if (Subtarget.hasZeroCycleRegMove()) {
2187 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2188 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2189 &AArch64::GPR64spRegClass);
2190 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2191 &AArch64::GPR64spRegClass);
2192 // This instruction is reading and writing X registers. This may upset
2193 // the register scavenger and machine verifier, so we need to indicate
2194 // that we are reading an undefined value from SrcRegX, but a proper
2195 // value from SrcReg.
2196 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2197 .addReg(AArch64::XZR)
2198 .addReg(SrcRegX, RegState::Undef)
2199 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2200 } else {
2201 // Otherwise, expand to ORR WZR.
2202 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2203 .addReg(AArch64::WZR)
2204 .addReg(SrcReg, getKillRegState(KillSrc));
2205 }
2206 }
2207 return;
2208 }
2209
2210 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2211 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2212 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2213 // If either operand is SP, expand to ADD #0.
2214 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2215 .addReg(SrcReg, getKillRegState(KillSrc))
2216 .addImm(0)
2217 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2218 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002219 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2220 .addImm(0)
2221 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002222 } else {
2223 // Otherwise, expand to ORR XZR.
2224 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2225 .addReg(AArch64::XZR)
2226 .addReg(SrcReg, getKillRegState(KillSrc));
2227 }
2228 return;
2229 }
2230
2231 // Copy a DDDD register quad by copying the individual sub-registers.
2232 if (AArch64::DDDDRegClass.contains(DestReg) &&
2233 AArch64::DDDDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002234 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2235 AArch64::dsub2, AArch64::dsub3};
Tim Northover3b0846e2014-05-24 12:50:23 +00002236 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2237 Indices);
2238 return;
2239 }
2240
2241 // Copy a DDD register triple by copying the individual sub-registers.
2242 if (AArch64::DDDRegClass.contains(DestReg) &&
2243 AArch64::DDDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002244 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2245 AArch64::dsub2};
Tim Northover3b0846e2014-05-24 12:50:23 +00002246 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2247 Indices);
2248 return;
2249 }
2250
2251 // Copy a DD register pair by copying the individual sub-registers.
2252 if (AArch64::DDRegClass.contains(DestReg) &&
2253 AArch64::DDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002254 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
Tim Northover3b0846e2014-05-24 12:50:23 +00002255 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2256 Indices);
2257 return;
2258 }
2259
2260 // Copy a QQQQ register quad by copying the individual sub-registers.
2261 if (AArch64::QQQQRegClass.contains(DestReg) &&
2262 AArch64::QQQQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002263 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2264 AArch64::qsub2, AArch64::qsub3};
Tim Northover3b0846e2014-05-24 12:50:23 +00002265 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2266 Indices);
2267 return;
2268 }
2269
2270 // Copy a QQQ register triple by copying the individual sub-registers.
2271 if (AArch64::QQQRegClass.contains(DestReg) &&
2272 AArch64::QQQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002273 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2274 AArch64::qsub2};
Tim Northover3b0846e2014-05-24 12:50:23 +00002275 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2276 Indices);
2277 return;
2278 }
2279
2280 // Copy a QQ register pair by copying the individual sub-registers.
2281 if (AArch64::QQRegClass.contains(DestReg) &&
2282 AArch64::QQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002283 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
Tim Northover3b0846e2014-05-24 12:50:23 +00002284 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2285 Indices);
2286 return;
2287 }
2288
2289 if (AArch64::FPR128RegClass.contains(DestReg) &&
2290 AArch64::FPR128RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002291 if (Subtarget.hasNEON()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002292 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2293 .addReg(SrcReg)
2294 .addReg(SrcReg, getKillRegState(KillSrc));
2295 } else {
2296 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
Jessica Paquette809d7082017-07-28 03:21:58 +00002297 .addReg(AArch64::SP, RegState::Define)
2298 .addReg(SrcReg, getKillRegState(KillSrc))
2299 .addReg(AArch64::SP)
2300 .addImm(-16);
Tim Northover3b0846e2014-05-24 12:50:23 +00002301 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
Jessica Paquette809d7082017-07-28 03:21:58 +00002302 .addReg(AArch64::SP, RegState::Define)
2303 .addReg(DestReg, RegState::Define)
2304 .addReg(AArch64::SP)
2305 .addImm(16);
Tim Northover3b0846e2014-05-24 12:50:23 +00002306 }
2307 return;
2308 }
2309
2310 if (AArch64::FPR64RegClass.contains(DestReg) &&
2311 AArch64::FPR64RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002312 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002313 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2314 &AArch64::FPR128RegClass);
2315 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2316 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002317 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2318 .addReg(SrcReg)
2319 .addReg(SrcReg, getKillRegState(KillSrc));
2320 } else {
2321 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2322 .addReg(SrcReg, getKillRegState(KillSrc));
2323 }
2324 return;
2325 }
2326
2327 if (AArch64::FPR32RegClass.contains(DestReg) &&
2328 AArch64::FPR32RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002329 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002330 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2331 &AArch64::FPR128RegClass);
2332 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2333 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002334 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2335 .addReg(SrcReg)
2336 .addReg(SrcReg, getKillRegState(KillSrc));
2337 } else {
2338 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2339 .addReg(SrcReg, getKillRegState(KillSrc));
2340 }
2341 return;
2342 }
2343
2344 if (AArch64::FPR16RegClass.contains(DestReg) &&
2345 AArch64::FPR16RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002346 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002347 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2348 &AArch64::FPR128RegClass);
2349 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2350 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002351 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2352 .addReg(SrcReg)
2353 .addReg(SrcReg, getKillRegState(KillSrc));
2354 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002355 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2356 &AArch64::FPR32RegClass);
2357 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2358 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002359 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2360 .addReg(SrcReg, getKillRegState(KillSrc));
2361 }
2362 return;
2363 }
2364
2365 if (AArch64::FPR8RegClass.contains(DestReg) &&
2366 AArch64::FPR8RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002367 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002368 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
Tim Northover3b0846e2014-05-24 12:50:23 +00002369 &AArch64::FPR128RegClass);
Eric Christophera0de2532015-03-18 20:37:30 +00002370 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2371 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002372 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2373 .addReg(SrcReg)
2374 .addReg(SrcReg, getKillRegState(KillSrc));
2375 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002376 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2377 &AArch64::FPR32RegClass);
2378 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2379 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002380 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2381 .addReg(SrcReg, getKillRegState(KillSrc));
2382 }
2383 return;
2384 }
2385
2386 // Copies between GPR64 and FPR64.
2387 if (AArch64::FPR64RegClass.contains(DestReg) &&
2388 AArch64::GPR64RegClass.contains(SrcReg)) {
2389 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2390 .addReg(SrcReg, getKillRegState(KillSrc));
2391 return;
2392 }
2393 if (AArch64::GPR64RegClass.contains(DestReg) &&
2394 AArch64::FPR64RegClass.contains(SrcReg)) {
2395 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2396 .addReg(SrcReg, getKillRegState(KillSrc));
2397 return;
2398 }
2399 // Copies between GPR32 and FPR32.
2400 if (AArch64::FPR32RegClass.contains(DestReg) &&
2401 AArch64::GPR32RegClass.contains(SrcReg)) {
2402 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2403 .addReg(SrcReg, getKillRegState(KillSrc));
2404 return;
2405 }
2406 if (AArch64::GPR32RegClass.contains(DestReg) &&
2407 AArch64::FPR32RegClass.contains(SrcReg)) {
2408 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2409 .addReg(SrcReg, getKillRegState(KillSrc));
2410 return;
2411 }
2412
Tim Northover1bed9af2014-05-27 12:16:02 +00002413 if (DestReg == AArch64::NZCV) {
2414 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2415 BuildMI(MBB, I, DL, get(AArch64::MSR))
Jessica Paquette809d7082017-07-28 03:21:58 +00002416 .addImm(AArch64SysReg::NZCV)
2417 .addReg(SrcReg, getKillRegState(KillSrc))
2418 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
Tim Northover1bed9af2014-05-27 12:16:02 +00002419 return;
2420 }
2421
2422 if (SrcReg == AArch64::NZCV) {
2423 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
Quentin Colombet658d9db2016-04-22 18:46:17 +00002424 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
Jessica Paquette809d7082017-07-28 03:21:58 +00002425 .addImm(AArch64SysReg::NZCV)
2426 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
Tim Northover1bed9af2014-05-27 12:16:02 +00002427 return;
2428 }
2429
2430 llvm_unreachable("unimplemented reg-to-reg copy");
Tim Northover3b0846e2014-05-24 12:50:23 +00002431}
2432
2433void AArch64InstrInfo::storeRegToStackSlot(
2434 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2435 bool isKill, int FI, const TargetRegisterClass *RC,
2436 const TargetRegisterInfo *TRI) const {
2437 DebugLoc DL;
2438 if (MBBI != MBB.end())
2439 DL = MBBI->getDebugLoc();
2440 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002441 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002442 unsigned Align = MFI.getObjectAlignment(FI);
2443
Alex Lorenze40c8a22015-08-11 23:09:45 +00002444 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002445 MachineMemOperand *MMO = MF.getMachineMemOperand(
2446 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2447 unsigned Opc = 0;
2448 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002449 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002450 case 1:
2451 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2452 Opc = AArch64::STRBui;
2453 break;
2454 case 2:
2455 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2456 Opc = AArch64::STRHui;
2457 break;
2458 case 4:
2459 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2460 Opc = AArch64::STRWui;
2461 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2462 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2463 else
2464 assert(SrcReg != AArch64::WSP);
2465 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2466 Opc = AArch64::STRSui;
2467 break;
2468 case 8:
2469 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2470 Opc = AArch64::STRXui;
2471 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2472 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2473 else
2474 assert(SrcReg != AArch64::SP);
2475 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2476 Opc = AArch64::STRDui;
2477 break;
2478 case 16:
2479 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2480 Opc = AArch64::STRQui;
2481 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002482 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002483 Opc = AArch64::ST1Twov1d;
2484 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002485 }
2486 break;
2487 case 24:
2488 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002489 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002490 Opc = AArch64::ST1Threev1d;
2491 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002492 }
2493 break;
2494 case 32:
2495 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002496 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002497 Opc = AArch64::ST1Fourv1d;
2498 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002499 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002500 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002501 Opc = AArch64::ST1Twov2d;
2502 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002503 }
2504 break;
2505 case 48:
2506 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002507 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002508 Opc = AArch64::ST1Threev2d;
2509 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002510 }
2511 break;
2512 case 64:
2513 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002514 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002515 Opc = AArch64::ST1Fourv2d;
2516 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002517 }
2518 break;
2519 }
2520 assert(Opc && "Unknown register class");
2521
James Molloyf8aa57a2015-04-16 11:37:40 +00002522 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Jessica Paquette809d7082017-07-28 03:21:58 +00002523 .addReg(SrcReg, getKillRegState(isKill))
2524 .addFrameIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002525
2526 if (Offset)
2527 MI.addImm(0);
2528 MI.addMemOperand(MMO);
2529}
2530
2531void AArch64InstrInfo::loadRegFromStackSlot(
2532 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2533 int FI, const TargetRegisterClass *RC,
2534 const TargetRegisterInfo *TRI) const {
2535 DebugLoc DL;
2536 if (MBBI != MBB.end())
2537 DL = MBBI->getDebugLoc();
2538 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002539 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002540 unsigned Align = MFI.getObjectAlignment(FI);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002541 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002542 MachineMemOperand *MMO = MF.getMachineMemOperand(
2543 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2544
2545 unsigned Opc = 0;
2546 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002547 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002548 case 1:
2549 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2550 Opc = AArch64::LDRBui;
2551 break;
2552 case 2:
2553 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2554 Opc = AArch64::LDRHui;
2555 break;
2556 case 4:
2557 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2558 Opc = AArch64::LDRWui;
2559 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2560 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2561 else
2562 assert(DestReg != AArch64::WSP);
2563 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2564 Opc = AArch64::LDRSui;
2565 break;
2566 case 8:
2567 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2568 Opc = AArch64::LDRXui;
2569 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2570 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2571 else
2572 assert(DestReg != AArch64::SP);
2573 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2574 Opc = AArch64::LDRDui;
2575 break;
2576 case 16:
2577 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2578 Opc = AArch64::LDRQui;
2579 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002580 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002581 Opc = AArch64::LD1Twov1d;
2582 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002583 }
2584 break;
2585 case 24:
2586 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002587 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002588 Opc = AArch64::LD1Threev1d;
2589 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002590 }
2591 break;
2592 case 32:
2593 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002594 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002595 Opc = AArch64::LD1Fourv1d;
2596 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002597 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002598 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002599 Opc = AArch64::LD1Twov2d;
2600 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002601 }
2602 break;
2603 case 48:
2604 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002605 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002606 Opc = AArch64::LD1Threev2d;
2607 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002608 }
2609 break;
2610 case 64:
2611 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002612 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002613 Opc = AArch64::LD1Fourv2d;
2614 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002615 }
2616 break;
2617 }
2618 assert(Opc && "Unknown register class");
2619
James Molloyf8aa57a2015-04-16 11:37:40 +00002620 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Jessica Paquette809d7082017-07-28 03:21:58 +00002621 .addReg(DestReg, getDefRegState(true))
2622 .addFrameIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002623 if (Offset)
2624 MI.addImm(0);
2625 MI.addMemOperand(MMO);
2626}
2627
2628void llvm::emitFrameOffset(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002629 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00002630 unsigned DestReg, unsigned SrcReg, int Offset,
Eric Christopherbc76b972014-06-10 17:33:39 +00002631 const TargetInstrInfo *TII,
Tim Northover3b0846e2014-05-24 12:50:23 +00002632 MachineInstr::MIFlag Flag, bool SetNZCV) {
2633 if (DestReg == SrcReg && Offset == 0)
2634 return;
2635
Geoff Berrya5335642016-05-06 16:34:59 +00002636 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2637 "SP increment/decrement not 16-byte aligned");
2638
Tim Northover3b0846e2014-05-24 12:50:23 +00002639 bool isSub = Offset < 0;
2640 if (isSub)
2641 Offset = -Offset;
2642
2643 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2644 // scratch register. If DestReg is a virtual register, use it as the
2645 // scratch register; otherwise, create a new virtual register (to be
2646 // replaced by the scavenger at the end of PEI). That case can be optimized
2647 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2648 // register can be loaded with offset%8 and the add/sub can use an extending
2649 // instruction with LSL#3.
2650 // Currently the function handles any offsets but generates a poor sequence
2651 // of code.
2652 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2653
2654 unsigned Opc;
2655 if (SetNZCV)
2656 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2657 else
2658 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2659 const unsigned MaxEncoding = 0xfff;
2660 const unsigned ShiftSize = 12;
2661 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2662 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2663 unsigned ThisVal;
2664 if (((unsigned)Offset) > MaxEncodableValue) {
2665 ThisVal = MaxEncodableValue;
2666 } else {
2667 ThisVal = Offset & MaxEncodableValue;
2668 }
2669 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2670 "Encoding cannot handle value that big");
2671 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2672 .addReg(SrcReg)
2673 .addImm(ThisVal >> ShiftSize)
2674 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2675 .setMIFlag(Flag);
2676
2677 SrcReg = DestReg;
2678 Offset -= ThisVal;
2679 if (Offset == 0)
2680 return;
2681 }
2682 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2683 .addReg(SrcReg)
2684 .addImm(Offset)
2685 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2686 .setMIFlag(Flag);
2687}
2688
Keno Fischere70b31f2015-06-08 20:09:58 +00002689MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002690 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
Jonas Paulsson8e5b0c62016-05-10 08:09:37 +00002691 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2692 LiveIntervals *LIS) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002693 // This is a bit of a hack. Consider this instruction:
2694 //
2695 // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2696 //
2697 // We explicitly chose GPR64all for the virtual register so such a copy might
2698 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2699 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2700 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2701 //
2702 // To prevent that, we are going to constrain the %vreg0 register class here.
2703 //
2704 // <rdar://problem/11522048>
2705 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002706 if (MI.isFullCopy()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002707 unsigned DstReg = MI.getOperand(0).getReg();
2708 unsigned SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00002709 if (SrcReg == AArch64::SP &&
2710 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2711 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2712 return nullptr;
2713 }
2714 if (DstReg == AArch64::SP &&
2715 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2716 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2717 return nullptr;
2718 }
2719 }
2720
Geoff Berryd46b6e82017-01-05 21:51:42 +00002721 // Handle the case where a copy is being spilled or filled but the source
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002722 // and destination register class don't match. For example:
Geoff Berry7c078fc2016-11-29 18:28:32 +00002723 //
2724 // %vreg0<def> = COPY %XZR; GPR64common:%vreg0
2725 //
2726 // In this case we can still safely fold away the COPY and generate the
2727 // following spill code:
2728 //
2729 // STRXui %XZR, <fi#0>
2730 //
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002731 // This also eliminates spilled cross register class COPYs (e.g. between x and
2732 // d regs) of the same size. For example:
2733 //
2734 // %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
2735 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002736 // will be filled as
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002737 //
2738 // LDRDui %vreg0, fi<#0>
2739 //
2740 // instead of
2741 //
2742 // LDRXui %vregTemp, fi<#0>
2743 // %vreg0 = FMOV %vregTemp
2744 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002745 if (MI.isCopy() && Ops.size() == 1 &&
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002746 // Make sure we're only folding the explicit COPY defs/uses.
2747 (Ops[0] == 0 || Ops[0] == 1)) {
Geoff Berryd46b6e82017-01-05 21:51:42 +00002748 bool IsSpill = Ops[0] == 0;
2749 bool IsFill = !IsSpill;
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002750 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
2751 const MachineRegisterInfo &MRI = MF.getRegInfo();
Geoff Berry7c078fc2016-11-29 18:28:32 +00002752 MachineBasicBlock &MBB = *MI.getParent();
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002753 const MachineOperand &DstMO = MI.getOperand(0);
Geoff Berry7c078fc2016-11-29 18:28:32 +00002754 const MachineOperand &SrcMO = MI.getOperand(1);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002755 unsigned DstReg = DstMO.getReg();
Geoff Berry7c078fc2016-11-29 18:28:32 +00002756 unsigned SrcReg = SrcMO.getReg();
Geoff Berryd46b6e82017-01-05 21:51:42 +00002757 // This is slightly expensive to compute for physical regs since
2758 // getMinimalPhysRegClass is slow.
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002759 auto getRegClass = [&](unsigned Reg) {
2760 return TargetRegisterInfo::isVirtualRegister(Reg)
2761 ? MRI.getRegClass(Reg)
2762 : TRI.getMinimalPhysRegClass(Reg);
2763 };
Geoff Berryd46b6e82017-01-05 21:51:42 +00002764
2765 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002766 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
Jessica Paquette809d7082017-07-28 03:21:58 +00002767 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00002768 "Mismatched register size in non subreg COPY");
2769 if (IsSpill)
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002770 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
Geoff Berryd46b6e82017-01-05 21:51:42 +00002771 getRegClass(SrcReg), &TRI);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002772 else
Geoff Berryd46b6e82017-01-05 21:51:42 +00002773 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
2774 getRegClass(DstReg), &TRI);
Geoff Berry7c078fc2016-11-29 18:28:32 +00002775 return &*--InsertPt;
2776 }
Geoff Berryd46b6e82017-01-05 21:51:42 +00002777
2778 // Handle cases like spilling def of:
2779 //
2780 // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
2781 //
2782 // where the physical register source can be widened and stored to the full
2783 // virtual reg destination stack slot, in this case producing:
2784 //
2785 // STRXui %XZR, <fi#0>
2786 //
2787 if (IsSpill && DstMO.isUndef() &&
2788 TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2789 assert(SrcMO.getSubReg() == 0 &&
2790 "Unexpected subreg on physical register");
2791 const TargetRegisterClass *SpillRC;
2792 unsigned SpillSubreg;
2793 switch (DstMO.getSubReg()) {
2794 default:
2795 SpillRC = nullptr;
2796 break;
2797 case AArch64::sub_32:
2798 case AArch64::ssub:
2799 if (AArch64::GPR32RegClass.contains(SrcReg)) {
2800 SpillRC = &AArch64::GPR64RegClass;
2801 SpillSubreg = AArch64::sub_32;
2802 } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
2803 SpillRC = &AArch64::FPR64RegClass;
2804 SpillSubreg = AArch64::ssub;
2805 } else
2806 SpillRC = nullptr;
2807 break;
2808 case AArch64::dsub:
2809 if (AArch64::FPR64RegClass.contains(SrcReg)) {
2810 SpillRC = &AArch64::FPR128RegClass;
2811 SpillSubreg = AArch64::dsub;
2812 } else
2813 SpillRC = nullptr;
2814 break;
2815 }
2816
2817 if (SpillRC)
2818 if (unsigned WidenedSrcReg =
2819 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
2820 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
2821 FrameIndex, SpillRC, &TRI);
2822 return &*--InsertPt;
2823 }
2824 }
2825
2826 // Handle cases like filling use of:
2827 //
2828 // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
2829 //
2830 // where we can load the full virtual reg source stack slot, into the subreg
2831 // destination, in this case producing:
2832 //
2833 // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
2834 //
2835 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
2836 const TargetRegisterClass *FillRC;
2837 switch (DstMO.getSubReg()) {
2838 default:
2839 FillRC = nullptr;
2840 break;
2841 case AArch64::sub_32:
2842 FillRC = &AArch64::GPR32RegClass;
2843 break;
2844 case AArch64::ssub:
2845 FillRC = &AArch64::FPR32RegClass;
2846 break;
2847 case AArch64::dsub:
2848 FillRC = &AArch64::FPR64RegClass;
2849 break;
2850 }
2851
2852 if (FillRC) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002853 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
2854 TRI.getRegSizeInBits(*FillRC) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00002855 "Mismatched regclass size on folded subreg COPY");
2856 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
2857 MachineInstr &LoadMI = *--InsertPt;
2858 MachineOperand &LoadDst = LoadMI.getOperand(0);
2859 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
2860 LoadDst.setSubReg(DstMO.getSubReg());
2861 LoadDst.setIsUndef();
2862 return &LoadMI;
2863 }
2864 }
Geoff Berry7c078fc2016-11-29 18:28:32 +00002865 }
2866
Tim Northover3b0846e2014-05-24 12:50:23 +00002867 // Cannot fold.
2868 return nullptr;
2869}
2870
2871int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2872 bool *OutUseUnscaledOp,
2873 unsigned *OutUnscaledOp,
2874 int *EmittableOffset) {
2875 int Scale = 1;
2876 bool IsSigned = false;
2877 // The ImmIdx should be changed case by case if it is not 2.
2878 unsigned ImmIdx = 2;
2879 unsigned UnscaledOp = 0;
2880 // Set output values in case of early exit.
2881 if (EmittableOffset)
2882 *EmittableOffset = 0;
2883 if (OutUseUnscaledOp)
2884 *OutUseUnscaledOp = false;
2885 if (OutUnscaledOp)
2886 *OutUnscaledOp = 0;
2887 switch (MI.getOpcode()) {
2888 default:
Craig Topper2a30d782014-06-18 05:05:13 +00002889 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
Tim Northover3b0846e2014-05-24 12:50:23 +00002890 // Vector spills/fills can't take an immediate offset.
2891 case AArch64::LD1Twov2d:
2892 case AArch64::LD1Threev2d:
2893 case AArch64::LD1Fourv2d:
2894 case AArch64::LD1Twov1d:
2895 case AArch64::LD1Threev1d:
2896 case AArch64::LD1Fourv1d:
2897 case AArch64::ST1Twov2d:
2898 case AArch64::ST1Threev2d:
2899 case AArch64::ST1Fourv2d:
2900 case AArch64::ST1Twov1d:
2901 case AArch64::ST1Threev1d:
2902 case AArch64::ST1Fourv1d:
2903 return AArch64FrameOffsetCannotUpdate;
2904 case AArch64::PRFMui:
2905 Scale = 8;
2906 UnscaledOp = AArch64::PRFUMi;
2907 break;
2908 case AArch64::LDRXui:
2909 Scale = 8;
2910 UnscaledOp = AArch64::LDURXi;
2911 break;
2912 case AArch64::LDRWui:
2913 Scale = 4;
2914 UnscaledOp = AArch64::LDURWi;
2915 break;
2916 case AArch64::LDRBui:
2917 Scale = 1;
2918 UnscaledOp = AArch64::LDURBi;
2919 break;
2920 case AArch64::LDRHui:
2921 Scale = 2;
2922 UnscaledOp = AArch64::LDURHi;
2923 break;
2924 case AArch64::LDRSui:
2925 Scale = 4;
2926 UnscaledOp = AArch64::LDURSi;
2927 break;
2928 case AArch64::LDRDui:
2929 Scale = 8;
2930 UnscaledOp = AArch64::LDURDi;
2931 break;
2932 case AArch64::LDRQui:
2933 Scale = 16;
2934 UnscaledOp = AArch64::LDURQi;
2935 break;
2936 case AArch64::LDRBBui:
2937 Scale = 1;
2938 UnscaledOp = AArch64::LDURBBi;
2939 break;
2940 case AArch64::LDRHHui:
2941 Scale = 2;
2942 UnscaledOp = AArch64::LDURHHi;
2943 break;
2944 case AArch64::LDRSBXui:
2945 Scale = 1;
2946 UnscaledOp = AArch64::LDURSBXi;
2947 break;
2948 case AArch64::LDRSBWui:
2949 Scale = 1;
2950 UnscaledOp = AArch64::LDURSBWi;
2951 break;
2952 case AArch64::LDRSHXui:
2953 Scale = 2;
2954 UnscaledOp = AArch64::LDURSHXi;
2955 break;
2956 case AArch64::LDRSHWui:
2957 Scale = 2;
2958 UnscaledOp = AArch64::LDURSHWi;
2959 break;
2960 case AArch64::LDRSWui:
2961 Scale = 4;
2962 UnscaledOp = AArch64::LDURSWi;
2963 break;
2964
2965 case AArch64::STRXui:
2966 Scale = 8;
2967 UnscaledOp = AArch64::STURXi;
2968 break;
2969 case AArch64::STRWui:
2970 Scale = 4;
2971 UnscaledOp = AArch64::STURWi;
2972 break;
2973 case AArch64::STRBui:
2974 Scale = 1;
2975 UnscaledOp = AArch64::STURBi;
2976 break;
2977 case AArch64::STRHui:
2978 Scale = 2;
2979 UnscaledOp = AArch64::STURHi;
2980 break;
2981 case AArch64::STRSui:
2982 Scale = 4;
2983 UnscaledOp = AArch64::STURSi;
2984 break;
2985 case AArch64::STRDui:
2986 Scale = 8;
2987 UnscaledOp = AArch64::STURDi;
2988 break;
2989 case AArch64::STRQui:
2990 Scale = 16;
2991 UnscaledOp = AArch64::STURQi;
2992 break;
2993 case AArch64::STRBBui:
2994 Scale = 1;
2995 UnscaledOp = AArch64::STURBBi;
2996 break;
2997 case AArch64::STRHHui:
2998 Scale = 2;
2999 UnscaledOp = AArch64::STURHHi;
3000 break;
3001
3002 case AArch64::LDPXi:
3003 case AArch64::LDPDi:
3004 case AArch64::STPXi:
3005 case AArch64::STPDi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003006 case AArch64::LDNPXi:
3007 case AArch64::LDNPDi:
3008 case AArch64::STNPXi:
3009 case AArch64::STNPDi:
3010 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003011 IsSigned = true;
3012 Scale = 8;
3013 break;
3014 case AArch64::LDPQi:
3015 case AArch64::STPQi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003016 case AArch64::LDNPQi:
3017 case AArch64::STNPQi:
3018 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003019 IsSigned = true;
3020 Scale = 16;
3021 break;
3022 case AArch64::LDPWi:
3023 case AArch64::LDPSi:
3024 case AArch64::STPWi:
3025 case AArch64::STPSi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003026 case AArch64::LDNPWi:
3027 case AArch64::LDNPSi:
3028 case AArch64::STNPWi:
3029 case AArch64::STNPSi:
3030 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003031 IsSigned = true;
3032 Scale = 4;
3033 break;
3034
3035 case AArch64::LDURXi:
3036 case AArch64::LDURWi:
3037 case AArch64::LDURBi:
3038 case AArch64::LDURHi:
3039 case AArch64::LDURSi:
3040 case AArch64::LDURDi:
3041 case AArch64::LDURQi:
3042 case AArch64::LDURHHi:
3043 case AArch64::LDURBBi:
3044 case AArch64::LDURSBXi:
3045 case AArch64::LDURSBWi:
3046 case AArch64::LDURSHXi:
3047 case AArch64::LDURSHWi:
3048 case AArch64::LDURSWi:
3049 case AArch64::STURXi:
3050 case AArch64::STURWi:
3051 case AArch64::STURBi:
3052 case AArch64::STURHi:
3053 case AArch64::STURSi:
3054 case AArch64::STURDi:
3055 case AArch64::STURQi:
3056 case AArch64::STURBBi:
3057 case AArch64::STURHHi:
3058 Scale = 1;
3059 break;
3060 }
3061
3062 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3063
3064 bool useUnscaledOp = false;
3065 // If the offset doesn't match the scale, we rewrite the instruction to
3066 // use the unscaled instruction instead. Likewise, if we have a negative
3067 // offset (and have an unscaled op to use).
3068 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3069 useUnscaledOp = true;
3070
3071 // Use an unscaled addressing mode if the instruction has a negative offset
3072 // (or if the instruction is already using an unscaled addressing mode).
3073 unsigned MaskBits;
3074 if (IsSigned) {
3075 // ldp/stp instructions.
3076 MaskBits = 7;
3077 Offset /= Scale;
3078 } else if (UnscaledOp == 0 || useUnscaledOp) {
3079 MaskBits = 9;
3080 IsSigned = true;
3081 Scale = 1;
3082 } else {
3083 MaskBits = 12;
3084 IsSigned = false;
3085 Offset /= Scale;
3086 }
3087
3088 // Attempt to fold address computation.
3089 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3090 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3091 if (Offset >= MinOff && Offset <= MaxOff) {
3092 if (EmittableOffset)
3093 *EmittableOffset = Offset;
3094 Offset = 0;
3095 } else {
3096 int NewOff = Offset < 0 ? MinOff : MaxOff;
3097 if (EmittableOffset)
3098 *EmittableOffset = NewOff;
3099 Offset = (Offset - NewOff) * Scale;
3100 }
3101 if (OutUseUnscaledOp)
3102 *OutUseUnscaledOp = useUnscaledOp;
3103 if (OutUnscaledOp)
3104 *OutUnscaledOp = UnscaledOp;
3105 return AArch64FrameOffsetCanUpdate |
3106 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3107}
3108
3109bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3110 unsigned FrameReg, int &Offset,
3111 const AArch64InstrInfo *TII) {
3112 unsigned Opcode = MI.getOpcode();
3113 unsigned ImmIdx = FrameRegIdx + 1;
3114
3115 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3116 Offset += MI.getOperand(ImmIdx).getImm();
3117 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3118 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3119 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3120 MI.eraseFromParent();
3121 Offset = 0;
3122 return true;
3123 }
3124
3125 int NewOffset;
3126 unsigned UnscaledOp;
3127 bool UseUnscaledOp;
3128 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3129 &UnscaledOp, &NewOffset);
3130 if (Status & AArch64FrameOffsetCanUpdate) {
3131 if (Status & AArch64FrameOffsetIsLegal)
3132 // Replace the FrameIndex with FrameReg.
3133 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3134 if (UseUnscaledOp)
3135 MI.setDesc(TII->get(UnscaledOp));
3136
3137 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3138 return Offset == 0;
3139 }
3140
3141 return false;
3142}
3143
Hans Wennborg9b9a5352017-04-21 21:48:41 +00003144void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00003145 NopInst.setOpcode(AArch64::HINT);
Jim Grosbache9119e42015-05-13 18:37:00 +00003146 NopInst.addOperand(MCOperand::createImm(0));
Tim Northover3b0846e2014-05-24 12:50:23 +00003147}
Chad Rosier9d1a5562016-05-02 14:56:21 +00003148
3149// AArch64 supports MachineCombiner.
Jessica Paquette809d7082017-07-28 03:21:58 +00003150bool AArch64InstrInfo::useMachineCombiner() const { return true; }
Eugene Zelenko049b0172017-01-06 00:30:53 +00003151
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003152// True when Opc sets flag
3153static bool isCombineInstrSettingFlag(unsigned Opc) {
3154 switch (Opc) {
3155 case AArch64::ADDSWrr:
3156 case AArch64::ADDSWri:
3157 case AArch64::ADDSXrr:
3158 case AArch64::ADDSXri:
3159 case AArch64::SUBSWrr:
3160 case AArch64::SUBSXrr:
3161 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3162 case AArch64::SUBSWri:
3163 case AArch64::SUBSXri:
3164 return true;
3165 default:
3166 break;
3167 }
3168 return false;
3169}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003170
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003171// 32b Opcodes that can be combined with a MUL
3172static bool isCombineInstrCandidate32(unsigned Opc) {
3173 switch (Opc) {
3174 case AArch64::ADDWrr:
3175 case AArch64::ADDWri:
3176 case AArch64::SUBWrr:
3177 case AArch64::ADDSWrr:
3178 case AArch64::ADDSWri:
3179 case AArch64::SUBSWrr:
3180 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3181 case AArch64::SUBWri:
3182 case AArch64::SUBSWri:
3183 return true;
3184 default:
3185 break;
3186 }
3187 return false;
3188}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003189
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003190// 64b Opcodes that can be combined with a MUL
3191static bool isCombineInstrCandidate64(unsigned Opc) {
3192 switch (Opc) {
3193 case AArch64::ADDXrr:
3194 case AArch64::ADDXri:
3195 case AArch64::SUBXrr:
3196 case AArch64::ADDSXrr:
3197 case AArch64::ADDSXri:
3198 case AArch64::SUBSXrr:
3199 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3200 case AArch64::SUBXri:
3201 case AArch64::SUBSXri:
3202 return true;
3203 default:
3204 break;
3205 }
3206 return false;
3207}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003208
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003209// FP Opcodes that can be combined with a FMUL
3210static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3211 switch (Inst.getOpcode()) {
Evandro Menezes19b2aed2016-09-15 19:55:23 +00003212 default:
3213 break;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003214 case AArch64::FADDSrr:
3215 case AArch64::FADDDrr:
3216 case AArch64::FADDv2f32:
3217 case AArch64::FADDv2f64:
3218 case AArch64::FADDv4f32:
3219 case AArch64::FSUBSrr:
3220 case AArch64::FSUBDrr:
3221 case AArch64::FSUBv2f32:
3222 case AArch64::FSUBv2f64:
3223 case AArch64::FSUBv4f32:
Logan Chience542ee2017-01-05 23:41:33 +00003224 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3225 return (Options.UnsafeFPMath ||
3226 Options.AllowFPOpFusion == FPOpFusion::Fast);
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003227 }
3228 return false;
3229}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003230
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003231// Opcodes that can be combined with a MUL
3232static bool isCombineInstrCandidate(unsigned Opc) {
3233 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3234}
3235
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003236//
3237// Utility routine that checks if \param MO is defined by an
3238// \param CombineOpc instruction in the basic block \param MBB
3239static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3240 unsigned CombineOpc, unsigned ZeroReg = 0,
3241 bool CheckZeroReg = false) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003242 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3243 MachineInstr *MI = nullptr;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003244
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003245 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3246 MI = MRI.getUniqueVRegDef(MO.getReg());
3247 // And it needs to be in the trace (otherwise, it won't have a depth).
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003248 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003249 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003250 // Must only used by the user we combine with.
Gerolf Hoflehnerfe2c11f2014-08-13 22:07:36 +00003251 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003252 return false;
3253
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003254 if (CheckZeroReg) {
3255 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3256 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3257 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3258 // The third input reg must be zero.
3259 if (MI->getOperand(3).getReg() != ZeroReg)
3260 return false;
3261 }
3262
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003263 return true;
3264}
3265
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003266//
3267// Is \param MO defined by an integer multiply and can be combined?
3268static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3269 unsigned MulOpc, unsigned ZeroReg) {
3270 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3271}
3272
3273//
3274// Is \param MO defined by a floating-point multiply and can be combined?
3275static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3276 unsigned MulOpc) {
3277 return canCombine(MBB, MO, MulOpc);
3278}
3279
Haicheng Wu08b94622016-01-07 04:01:02 +00003280// TODO: There are many more machine instruction opcodes to match:
3281// 1. Other data types (integer, vectors)
3282// 2. Other math / logic operations (xor, or)
3283// 3. Other forms of the same operation (intrinsics and other variants)
Jessica Paquette809d7082017-07-28 03:21:58 +00003284bool AArch64InstrInfo::isAssociativeAndCommutative(
3285 const MachineInstr &Inst) const {
Haicheng Wu08b94622016-01-07 04:01:02 +00003286 switch (Inst.getOpcode()) {
3287 case AArch64::FADDDrr:
3288 case AArch64::FADDSrr:
3289 case AArch64::FADDv2f32:
3290 case AArch64::FADDv2f64:
3291 case AArch64::FADDv4f32:
3292 case AArch64::FMULDrr:
3293 case AArch64::FMULSrr:
3294 case AArch64::FMULX32:
3295 case AArch64::FMULX64:
3296 case AArch64::FMULXv2f32:
3297 case AArch64::FMULXv2f64:
3298 case AArch64::FMULXv4f32:
3299 case AArch64::FMULv2f32:
3300 case AArch64::FMULv2f64:
3301 case AArch64::FMULv4f32:
3302 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3303 default:
3304 return false;
3305 }
3306}
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003307
Haicheng Wu08b94622016-01-07 04:01:02 +00003308/// Find instructions that can be turned into madd.
3309static bool getMaddPatterns(MachineInstr &Root,
3310 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003311 unsigned Opc = Root.getOpcode();
3312 MachineBasicBlock &MBB = *Root.getParent();
3313 bool Found = false;
3314
3315 if (!isCombineInstrCandidate(Opc))
Chad Rosier85c85942016-03-23 20:07:28 +00003316 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003317 if (isCombineInstrSettingFlag(Opc)) {
3318 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3319 // When NZCV is live bail out.
3320 if (Cmp_NZCV == -1)
Chad Rosier85c85942016-03-23 20:07:28 +00003321 return false;
Chad Rosier6db9ff62017-06-23 19:20:12 +00003322 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003323 // When opcode can't change bail out.
3324 // CHECKME: do we miss any cases for opcode conversion?
3325 if (NewOpc == Opc)
Chad Rosier85c85942016-03-23 20:07:28 +00003326 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003327 Opc = NewOpc;
3328 }
3329
3330 switch (Opc) {
3331 default:
3332 break;
3333 case AArch64::ADDWrr:
3334 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3335 "ADDWrr does not have register operands");
3336 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3337 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003338 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003339 Found = true;
3340 }
3341 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3342 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003343 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003344 Found = true;
3345 }
3346 break;
3347 case AArch64::ADDXrr:
3348 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3349 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003350 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003351 Found = true;
3352 }
3353 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3354 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003355 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003356 Found = true;
3357 }
3358 break;
3359 case AArch64::SUBWrr:
3360 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3361 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003362 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003363 Found = true;
3364 }
3365 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3366 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003367 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003368 Found = true;
3369 }
3370 break;
3371 case AArch64::SUBXrr:
3372 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3373 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003374 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003375 Found = true;
3376 }
3377 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3378 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003379 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003380 Found = true;
3381 }
3382 break;
3383 case AArch64::ADDWri:
3384 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3385 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003386 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003387 Found = true;
3388 }
3389 break;
3390 case AArch64::ADDXri:
3391 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3392 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003393 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003394 Found = true;
3395 }
3396 break;
3397 case AArch64::SUBWri:
3398 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3399 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003400 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003401 Found = true;
3402 }
3403 break;
3404 case AArch64::SUBXri:
3405 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3406 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003407 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003408 Found = true;
3409 }
3410 break;
3411 }
3412 return Found;
3413}
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003414/// Floating-Point Support
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003415
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003416/// Find instructions that can be turned into madd.
3417static bool getFMAPatterns(MachineInstr &Root,
3418 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3419
3420 if (!isCombineInstrCandidateFP(Root))
Eugene Zelenko049b0172017-01-06 00:30:53 +00003421 return false;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003422
3423 MachineBasicBlock &MBB = *Root.getParent();
3424 bool Found = false;
3425
3426 switch (Root.getOpcode()) {
3427 default:
3428 assert(false && "Unsupported FP instruction in combiner\n");
3429 break;
3430 case AArch64::FADDSrr:
3431 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3432 "FADDWrr does not have register operands");
3433 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3434 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3435 Found = true;
3436 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3437 AArch64::FMULv1i32_indexed)) {
3438 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3439 Found = true;
3440 }
3441 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3442 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3443 Found = true;
3444 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3445 AArch64::FMULv1i32_indexed)) {
3446 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3447 Found = true;
3448 }
3449 break;
3450 case AArch64::FADDDrr:
3451 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3452 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3453 Found = true;
3454 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3455 AArch64::FMULv1i64_indexed)) {
3456 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3457 Found = true;
3458 }
3459 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3460 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3461 Found = true;
3462 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3463 AArch64::FMULv1i64_indexed)) {
3464 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3465 Found = true;
3466 }
3467 break;
3468 case AArch64::FADDv2f32:
3469 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3470 AArch64::FMULv2i32_indexed)) {
3471 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3472 Found = true;
3473 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3474 AArch64::FMULv2f32)) {
3475 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3476 Found = true;
3477 }
3478 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3479 AArch64::FMULv2i32_indexed)) {
3480 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3481 Found = true;
3482 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3483 AArch64::FMULv2f32)) {
3484 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3485 Found = true;
3486 }
3487 break;
3488 case AArch64::FADDv2f64:
3489 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3490 AArch64::FMULv2i64_indexed)) {
3491 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3492 Found = true;
3493 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3494 AArch64::FMULv2f64)) {
3495 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3496 Found = true;
3497 }
3498 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3499 AArch64::FMULv2i64_indexed)) {
3500 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3501 Found = true;
3502 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3503 AArch64::FMULv2f64)) {
3504 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3505 Found = true;
3506 }
3507 break;
3508 case AArch64::FADDv4f32:
3509 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3510 AArch64::FMULv4i32_indexed)) {
3511 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3512 Found = true;
3513 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3514 AArch64::FMULv4f32)) {
3515 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3516 Found = true;
3517 }
3518 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3519 AArch64::FMULv4i32_indexed)) {
3520 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3521 Found = true;
3522 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3523 AArch64::FMULv4f32)) {
3524 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3525 Found = true;
3526 }
3527 break;
3528
3529 case AArch64::FSUBSrr:
3530 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3531 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3532 Found = true;
3533 }
3534 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3535 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3536 Found = true;
3537 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3538 AArch64::FMULv1i32_indexed)) {
3539 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3540 Found = true;
3541 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00003542 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3543 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3544 Found = true;
3545 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003546 break;
3547 case AArch64::FSUBDrr:
3548 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3549 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3550 Found = true;
3551 }
3552 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3553 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3554 Found = true;
3555 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3556 AArch64::FMULv1i64_indexed)) {
3557 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3558 Found = true;
3559 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00003560 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3561 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3562 Found = true;
3563 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003564 break;
3565 case AArch64::FSUBv2f32:
3566 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3567 AArch64::FMULv2i32_indexed)) {
3568 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3569 Found = true;
3570 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3571 AArch64::FMULv2f32)) {
3572 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3573 Found = true;
3574 }
3575 break;
3576 case AArch64::FSUBv2f64:
3577 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3578 AArch64::FMULv2i64_indexed)) {
3579 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3580 Found = true;
3581 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3582 AArch64::FMULv2f64)) {
3583 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3584 Found = true;
3585 }
3586 break;
3587 case AArch64::FSUBv4f32:
3588 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3589 AArch64::FMULv4i32_indexed)) {
3590 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3591 Found = true;
3592 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3593 AArch64::FMULv4f32)) {
3594 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3595 Found = true;
3596 }
3597 break;
3598 }
3599 return Found;
3600}
3601
3602/// Return true when a code sequence can improve throughput. It
3603/// should be called only for instructions in loops.
3604/// \param Pattern - combiner pattern
Jessica Paquette809d7082017-07-28 03:21:58 +00003605bool AArch64InstrInfo::isThroughputPattern(
3606 MachineCombinerPattern Pattern) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003607 switch (Pattern) {
3608 default:
3609 break;
3610 case MachineCombinerPattern::FMULADDS_OP1:
3611 case MachineCombinerPattern::FMULADDS_OP2:
3612 case MachineCombinerPattern::FMULSUBS_OP1:
3613 case MachineCombinerPattern::FMULSUBS_OP2:
3614 case MachineCombinerPattern::FMULADDD_OP1:
3615 case MachineCombinerPattern::FMULADDD_OP2:
3616 case MachineCombinerPattern::FMULSUBD_OP1:
3617 case MachineCombinerPattern::FMULSUBD_OP2:
Chad Rosieraeffffd2017-05-11 20:07:24 +00003618 case MachineCombinerPattern::FNMULSUBS_OP1:
3619 case MachineCombinerPattern::FNMULSUBD_OP1:
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003620 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3621 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3622 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3623 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3624 case MachineCombinerPattern::FMLAv2f32_OP2:
3625 case MachineCombinerPattern::FMLAv2f32_OP1:
3626 case MachineCombinerPattern::FMLAv2f64_OP1:
3627 case MachineCombinerPattern::FMLAv2f64_OP2:
3628 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3629 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3630 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3631 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3632 case MachineCombinerPattern::FMLAv4f32_OP1:
3633 case MachineCombinerPattern::FMLAv4f32_OP2:
3634 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3635 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3636 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3637 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3638 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3639 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3640 case MachineCombinerPattern::FMLSv2f32_OP2:
3641 case MachineCombinerPattern::FMLSv2f64_OP2:
3642 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3643 case MachineCombinerPattern::FMLSv4f32_OP2:
3644 return true;
3645 } // end switch (Pattern)
3646 return false;
3647}
Haicheng Wu08b94622016-01-07 04:01:02 +00003648/// Return true when there is potentially a faster code sequence for an
3649/// instruction chain ending in \p Root. All potential patterns are listed in
3650/// the \p Pattern vector. Pattern should be sorted in priority order since the
3651/// pattern evaluator stops checking as soon as it finds a faster sequence.
3652
3653bool AArch64InstrInfo::getMachineCombinerPatterns(
3654 MachineInstr &Root,
3655 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003656 // Integer patterns
Haicheng Wu08b94622016-01-07 04:01:02 +00003657 if (getMaddPatterns(Root, Patterns))
3658 return true;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003659 // Floating point patterns
3660 if (getFMAPatterns(Root, Patterns))
3661 return true;
Haicheng Wu08b94622016-01-07 04:01:02 +00003662
3663 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3664}
3665
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003666enum class FMAInstKind { Default, Indexed, Accumulator };
3667/// genFusedMultiply - Generate fused multiply instructions.
3668/// This function supports both integer and floating point instructions.
3669/// A typical example:
3670/// F|MUL I=A,B,0
3671/// F|ADD R,I,C
3672/// ==> F|MADD R,A,B,C
Joel Jones7466ccf2017-07-10 22:11:50 +00003673/// \param MF Containing MachineFunction
3674/// \param MRI Register information
3675/// \param TII Target information
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003676/// \param Root is the F|ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003677/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003678/// contain the generated madd instruction
3679/// \param IdxMulOpd is index of operand in Root that is the result of
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003680/// the F|MUL. In the example above IdxMulOpd is 1.
3681/// \param MaddOpc the opcode fo the f|madd instruction
Joel Jones7466ccf2017-07-10 22:11:50 +00003682/// \param RC Register class of operands
3683/// \param kind of fma instruction (addressing mode) to be generated
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003684static MachineInstr *
3685genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3686 const TargetInstrInfo *TII, MachineInstr &Root,
3687 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3688 unsigned MaddOpc, const TargetRegisterClass *RC,
3689 FMAInstKind kind = FMAInstKind::Default) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003690 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3691
3692 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3693 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003694 unsigned ResultReg = Root.getOperand(0).getReg();
3695 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3696 bool Src0IsKill = MUL->getOperand(1).isKill();
3697 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3698 bool Src1IsKill = MUL->getOperand(2).isKill();
3699 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3700 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3701
3702 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3703 MRI.constrainRegClass(ResultReg, RC);
3704 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3705 MRI.constrainRegClass(SrcReg0, RC);
3706 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3707 MRI.constrainRegClass(SrcReg1, RC);
3708 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3709 MRI.constrainRegClass(SrcReg2, RC);
3710
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003711 MachineInstrBuilder MIB;
3712 if (kind == FMAInstKind::Default)
3713 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3714 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3715 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3716 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3717 else if (kind == FMAInstKind::Indexed)
3718 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3719 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3720 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3721 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3722 .addImm(MUL->getOperand(3).getImm());
3723 else if (kind == FMAInstKind::Accumulator)
3724 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3725 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3726 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3727 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3728 else
3729 assert(false && "Invalid FMA instruction kind \n");
3730 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003731 InsInstrs.push_back(MIB);
3732 return MUL;
3733}
3734
3735/// genMaddR - Generate madd instruction and combine mul and add using
3736/// an extra virtual register
3737/// Example - an ADD intermediate needs to be stored in a register:
3738/// MUL I=A,B,0
3739/// ADD R,I,Imm
3740/// ==> ORR V, ZR, Imm
3741/// ==> MADD R,A,B,V
Joel Jones7466ccf2017-07-10 22:11:50 +00003742/// \param MF Containing MachineFunction
3743/// \param MRI Register information
3744/// \param TII Target information
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003745/// \param Root is the ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003746/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003747/// contain the generated madd instruction
3748/// \param IdxMulOpd is index of operand in Root that is the result of
3749/// the MUL. In the example above IdxMulOpd is 1.
3750/// \param MaddOpc the opcode fo the madd instruction
3751/// \param VR is a virtual register that holds the value of an ADD operand
3752/// (V in the example above).
Joel Jones7466ccf2017-07-10 22:11:50 +00003753/// \param RC Register class of operands
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003754static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3755 const TargetInstrInfo *TII, MachineInstr &Root,
3756 SmallVectorImpl<MachineInstr *> &InsInstrs,
Jessica Paquette809d7082017-07-28 03:21:58 +00003757 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
3758 const TargetRegisterClass *RC) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003759 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3760
3761 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003762 unsigned ResultReg = Root.getOperand(0).getReg();
3763 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3764 bool Src0IsKill = MUL->getOperand(1).isKill();
3765 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3766 bool Src1IsKill = MUL->getOperand(2).isKill();
3767
3768 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3769 MRI.constrainRegClass(ResultReg, RC);
3770 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3771 MRI.constrainRegClass(SrcReg0, RC);
3772 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3773 MRI.constrainRegClass(SrcReg1, RC);
3774 if (TargetRegisterInfo::isVirtualRegister(VR))
3775 MRI.constrainRegClass(VR, RC);
3776
Jessica Paquette809d7082017-07-28 03:21:58 +00003777 MachineInstrBuilder MIB =
3778 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3779 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3780 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3781 .addReg(VR);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003782 // Insert the MADD
3783 InsInstrs.push_back(MIB);
3784 return MUL;
3785}
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003786
Sanjay Patelcfe03932015-06-19 23:21:42 +00003787/// When getMachineCombinerPatterns() finds potential patterns,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003788/// this function generates the instructions that could replace the
3789/// original code sequence
3790void AArch64InstrInfo::genAlternativeCodeSequence(
Sanjay Patel387e66e2015-11-05 19:34:57 +00003791 MachineInstr &Root, MachineCombinerPattern Pattern,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003792 SmallVectorImpl<MachineInstr *> &InsInstrs,
3793 SmallVectorImpl<MachineInstr *> &DelInstrs,
3794 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3795 MachineBasicBlock &MBB = *Root.getParent();
3796 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3797 MachineFunction &MF = *MBB.getParent();
Eric Christophere0818912014-09-03 20:36:26 +00003798 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003799
3800 MachineInstr *MUL;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003801 const TargetRegisterClass *RC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003802 unsigned Opc;
3803 switch (Pattern) {
3804 default:
Haicheng Wu08b94622016-01-07 04:01:02 +00003805 // Reassociate instructions.
3806 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3807 DelInstrs, InstrIdxForVirtReg);
3808 return;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003809 case MachineCombinerPattern::MULADDW_OP1:
3810 case MachineCombinerPattern::MULADDX_OP1:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003811 // MUL I=A,B,0
3812 // ADD R,I,C
3813 // ==> MADD R,A,B,C
3814 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003815 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003816 Opc = AArch64::MADDWrrr;
3817 RC = &AArch64::GPR32RegClass;
3818 } else {
3819 Opc = AArch64::MADDXrrr;
3820 RC = &AArch64::GPR64RegClass;
3821 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003822 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003823 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003824 case MachineCombinerPattern::MULADDW_OP2:
3825 case MachineCombinerPattern::MULADDX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003826 // MUL I=A,B,0
3827 // ADD R,C,I
3828 // ==> MADD R,A,B,C
3829 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003830 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003831 Opc = AArch64::MADDWrrr;
3832 RC = &AArch64::GPR32RegClass;
3833 } else {
3834 Opc = AArch64::MADDXrrr;
3835 RC = &AArch64::GPR64RegClass;
3836 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003837 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003838 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003839 case MachineCombinerPattern::MULADDWI_OP1:
3840 case MachineCombinerPattern::MULADDXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003841 // MUL I=A,B,0
3842 // ADD R,I,Imm
3843 // ==> ORR V, ZR, Imm
3844 // ==> MADD R,A,B,V
3845 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003846 const TargetRegisterClass *OrrRC;
3847 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003848 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003849 OrrOpc = AArch64::ORRWri;
3850 OrrRC = &AArch64::GPR32spRegClass;
3851 BitSize = 32;
3852 ZeroReg = AArch64::WZR;
3853 Opc = AArch64::MADDWrrr;
3854 RC = &AArch64::GPR32RegClass;
3855 } else {
3856 OrrOpc = AArch64::ORRXri;
3857 OrrRC = &AArch64::GPR64spRegClass;
3858 BitSize = 64;
3859 ZeroReg = AArch64::XZR;
3860 Opc = AArch64::MADDXrrr;
3861 RC = &AArch64::GPR64RegClass;
3862 }
3863 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3864 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003865
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003866 if (Root.getOperand(3).isImm()) {
3867 unsigned Val = Root.getOperand(3).getImm();
3868 Imm = Imm << Val;
3869 }
David Majnemer1182dd82016-07-21 23:46:56 +00003870 uint64_t UImm = SignExtend64(Imm, BitSize);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003871 uint64_t Encoding;
3872 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3873 MachineInstrBuilder MIB1 =
3874 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3875 .addReg(ZeroReg)
3876 .addImm(Encoding);
3877 InsInstrs.push_back(MIB1);
3878 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3879 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003880 }
3881 break;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003882 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003883 case MachineCombinerPattern::MULSUBW_OP1:
3884 case MachineCombinerPattern::MULSUBX_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003885 // MUL I=A,B,0
3886 // SUB R,I, C
3887 // ==> SUB V, 0, C
3888 // ==> MADD R,A,B,V // = -C + A*B
3889 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003890 const TargetRegisterClass *SubRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003891 unsigned SubOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003892 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003893 SubOpc = AArch64::SUBWrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003894 SubRC = &AArch64::GPR32spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003895 ZeroReg = AArch64::WZR;
3896 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003897 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003898 } else {
3899 SubOpc = AArch64::SUBXrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003900 SubRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003901 ZeroReg = AArch64::XZR;
3902 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003903 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003904 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003905 unsigned NewVR = MRI.createVirtualRegister(SubRC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003906 // SUB NewVR, 0, C
3907 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003908 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003909 .addReg(ZeroReg)
Diana Picus116bbab2017-01-13 09:58:52 +00003910 .add(Root.getOperand(2));
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003911 InsInstrs.push_back(MIB1);
3912 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003913 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3914 break;
3915 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003916 case MachineCombinerPattern::MULSUBW_OP2:
3917 case MachineCombinerPattern::MULSUBX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003918 // MUL I=A,B,0
3919 // SUB R,C,I
3920 // ==> MSUB R,A,B,C (computes C - A*B)
3921 // --- Create(MSUB);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003922 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003923 Opc = AArch64::MSUBWrrr;
3924 RC = &AArch64::GPR32RegClass;
3925 } else {
3926 Opc = AArch64::MSUBXrrr;
3927 RC = &AArch64::GPR64RegClass;
3928 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003929 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003930 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003931 case MachineCombinerPattern::MULSUBWI_OP1:
3932 case MachineCombinerPattern::MULSUBXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003933 // MUL I=A,B,0
3934 // SUB R,I, Imm
3935 // ==> ORR V, ZR, -Imm
3936 // ==> MADD R,A,B,V // = -Imm + A*B
3937 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003938 const TargetRegisterClass *OrrRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003939 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003940 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
Juergen Ributzka25816b02014-08-30 06:16:26 +00003941 OrrOpc = AArch64::ORRWri;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003942 OrrRC = &AArch64::GPR32spRegClass;
3943 BitSize = 32;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003944 ZeroReg = AArch64::WZR;
3945 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003946 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003947 } else {
3948 OrrOpc = AArch64::ORRXri;
Juergen Ributzkaf9660f02014-11-04 22:20:07 +00003949 OrrRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003950 BitSize = 64;
3951 ZeroReg = AArch64::XZR;
3952 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003953 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003954 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003955 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
David Majnemer1182dd82016-07-21 23:46:56 +00003956 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003957 if (Root.getOperand(3).isImm()) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003958 unsigned Val = Root.getOperand(3).getImm();
3959 Imm = Imm << Val;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003960 }
David Majnemer1182dd82016-07-21 23:46:56 +00003961 uint64_t UImm = SignExtend64(-Imm, BitSize);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003962 uint64_t Encoding;
3963 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3964 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003965 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003966 .addReg(ZeroReg)
3967 .addImm(Encoding);
3968 InsInstrs.push_back(MIB1);
3969 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003970 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003971 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003972 break;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003973 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003974 // Floating Point Support
3975 case MachineCombinerPattern::FMULADDS_OP1:
3976 case MachineCombinerPattern::FMULADDD_OP1:
3977 // MUL I=A,B,0
3978 // ADD R,I,C
3979 // ==> MADD R,A,B,C
3980 // --- Create(MADD);
3981 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3982 Opc = AArch64::FMADDSrrr;
3983 RC = &AArch64::FPR32RegClass;
3984 } else {
3985 Opc = AArch64::FMADDDrrr;
3986 RC = &AArch64::FPR64RegClass;
3987 }
3988 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3989 break;
3990 case MachineCombinerPattern::FMULADDS_OP2:
3991 case MachineCombinerPattern::FMULADDD_OP2:
3992 // FMUL I=A,B,0
3993 // FADD R,C,I
3994 // ==> FMADD R,A,B,C
3995 // --- Create(FMADD);
3996 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3997 Opc = AArch64::FMADDSrrr;
3998 RC = &AArch64::FPR32RegClass;
3999 } else {
4000 Opc = AArch64::FMADDDrrr;
4001 RC = &AArch64::FPR64RegClass;
4002 }
4003 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4004 break;
4005
4006 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4007 Opc = AArch64::FMLAv1i32_indexed;
4008 RC = &AArch64::FPR32RegClass;
4009 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4010 FMAInstKind::Indexed);
4011 break;
4012 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4013 Opc = AArch64::FMLAv1i32_indexed;
4014 RC = &AArch64::FPR32RegClass;
4015 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4016 FMAInstKind::Indexed);
4017 break;
4018
4019 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4020 Opc = AArch64::FMLAv1i64_indexed;
4021 RC = &AArch64::FPR64RegClass;
4022 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4023 FMAInstKind::Indexed);
4024 break;
4025 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4026 Opc = AArch64::FMLAv1i64_indexed;
4027 RC = &AArch64::FPR64RegClass;
4028 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4029 FMAInstKind::Indexed);
4030 break;
4031
4032 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4033 case MachineCombinerPattern::FMLAv2f32_OP1:
4034 RC = &AArch64::FPR64RegClass;
4035 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4036 Opc = AArch64::FMLAv2i32_indexed;
4037 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4038 FMAInstKind::Indexed);
4039 } else {
4040 Opc = AArch64::FMLAv2f32;
4041 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4042 FMAInstKind::Accumulator);
4043 }
4044 break;
4045 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4046 case MachineCombinerPattern::FMLAv2f32_OP2:
4047 RC = &AArch64::FPR64RegClass;
4048 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4049 Opc = AArch64::FMLAv2i32_indexed;
4050 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4051 FMAInstKind::Indexed);
4052 } else {
4053 Opc = AArch64::FMLAv2f32;
4054 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4055 FMAInstKind::Accumulator);
4056 }
4057 break;
4058
4059 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4060 case MachineCombinerPattern::FMLAv2f64_OP1:
4061 RC = &AArch64::FPR128RegClass;
4062 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4063 Opc = AArch64::FMLAv2i64_indexed;
4064 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4065 FMAInstKind::Indexed);
4066 } else {
4067 Opc = AArch64::FMLAv2f64;
4068 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4069 FMAInstKind::Accumulator);
4070 }
4071 break;
4072 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4073 case MachineCombinerPattern::FMLAv2f64_OP2:
4074 RC = &AArch64::FPR128RegClass;
4075 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4076 Opc = AArch64::FMLAv2i64_indexed;
4077 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4078 FMAInstKind::Indexed);
4079 } else {
4080 Opc = AArch64::FMLAv2f64;
4081 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4082 FMAInstKind::Accumulator);
4083 }
4084 break;
4085
4086 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4087 case MachineCombinerPattern::FMLAv4f32_OP1:
4088 RC = &AArch64::FPR128RegClass;
4089 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4090 Opc = AArch64::FMLAv4i32_indexed;
4091 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4092 FMAInstKind::Indexed);
4093 } else {
4094 Opc = AArch64::FMLAv4f32;
4095 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4096 FMAInstKind::Accumulator);
4097 }
4098 break;
4099
4100 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4101 case MachineCombinerPattern::FMLAv4f32_OP2:
4102 RC = &AArch64::FPR128RegClass;
4103 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4104 Opc = AArch64::FMLAv4i32_indexed;
4105 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4106 FMAInstKind::Indexed);
4107 } else {
4108 Opc = AArch64::FMLAv4f32;
4109 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4110 FMAInstKind::Accumulator);
4111 }
4112 break;
4113
4114 case MachineCombinerPattern::FMULSUBS_OP1:
4115 case MachineCombinerPattern::FMULSUBD_OP1: {
4116 // FMUL I=A,B,0
4117 // FSUB R,I,C
4118 // ==> FNMSUB R,A,B,C // = -C + A*B
4119 // --- Create(FNMSUB);
4120 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4121 Opc = AArch64::FNMSUBSrrr;
4122 RC = &AArch64::FPR32RegClass;
4123 } else {
4124 Opc = AArch64::FNMSUBDrrr;
4125 RC = &AArch64::FPR64RegClass;
4126 }
4127 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4128 break;
4129 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00004130
4131 case MachineCombinerPattern::FNMULSUBS_OP1:
4132 case MachineCombinerPattern::FNMULSUBD_OP1: {
4133 // FNMUL I=A,B,0
4134 // FSUB R,I,C
4135 // ==> FNMADD R,A,B,C // = -A*B - C
4136 // --- Create(FNMADD);
4137 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4138 Opc = AArch64::FNMADDSrrr;
4139 RC = &AArch64::FPR32RegClass;
4140 } else {
4141 Opc = AArch64::FNMADDDrrr;
4142 RC = &AArch64::FPR64RegClass;
4143 }
4144 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4145 break;
4146 }
4147
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004148 case MachineCombinerPattern::FMULSUBS_OP2:
4149 case MachineCombinerPattern::FMULSUBD_OP2: {
4150 // FMUL I=A,B,0
4151 // FSUB R,C,I
4152 // ==> FMSUB R,A,B,C (computes C - A*B)
4153 // --- Create(FMSUB);
4154 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4155 Opc = AArch64::FMSUBSrrr;
4156 RC = &AArch64::FPR32RegClass;
4157 } else {
4158 Opc = AArch64::FMSUBDrrr;
4159 RC = &AArch64::FPR64RegClass;
4160 }
4161 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4162 break;
Chad Rosier8b12a032017-05-16 12:43:23 +00004163 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004164
4165 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4166 Opc = AArch64::FMLSv1i32_indexed;
4167 RC = &AArch64::FPR32RegClass;
4168 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4169 FMAInstKind::Indexed);
4170 break;
4171
4172 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4173 Opc = AArch64::FMLSv1i64_indexed;
4174 RC = &AArch64::FPR64RegClass;
4175 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4176 FMAInstKind::Indexed);
4177 break;
4178
4179 case MachineCombinerPattern::FMLSv2f32_OP2:
4180 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4181 RC = &AArch64::FPR64RegClass;
4182 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4183 Opc = AArch64::FMLSv2i32_indexed;
4184 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4185 FMAInstKind::Indexed);
4186 } else {
4187 Opc = AArch64::FMLSv2f32;
4188 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4189 FMAInstKind::Accumulator);
4190 }
4191 break;
4192
4193 case MachineCombinerPattern::FMLSv2f64_OP2:
4194 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4195 RC = &AArch64::FPR128RegClass;
4196 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4197 Opc = AArch64::FMLSv2i64_indexed;
4198 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4199 FMAInstKind::Indexed);
4200 } else {
4201 Opc = AArch64::FMLSv2f64;
4202 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4203 FMAInstKind::Accumulator);
4204 }
4205 break;
4206
4207 case MachineCombinerPattern::FMLSv4f32_OP2:
4208 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4209 RC = &AArch64::FPR128RegClass;
4210 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4211 Opc = AArch64::FMLSv4i32_indexed;
4212 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4213 FMAInstKind::Indexed);
4214 } else {
4215 Opc = AArch64::FMLSv4f32;
4216 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4217 FMAInstKind::Accumulator);
4218 }
4219 break;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004220 } // end switch (Pattern)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004221 // Record MUL and ADD/SUB for deletion
4222 DelInstrs.push_back(MUL);
4223 DelInstrs.push_back(&Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004224}
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004225
4226/// \brief Replace csincr-branch sequence by simple conditional branch
4227///
4228/// Examples:
Joel Jonesaff09bf2017-07-06 14:17:36 +00004229/// 1. \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004230/// csinc w9, wzr, wzr, <condition code>
4231/// tbnz w9, #0, 0x44
Joel Jonesaff09bf2017-07-06 14:17:36 +00004232/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004233/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004234/// \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004235/// b.<inverted condition code>
Joel Jonesaff09bf2017-07-06 14:17:36 +00004236/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004237///
Joel Jonesaff09bf2017-07-06 14:17:36 +00004238/// 2. \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004239/// csinc w9, wzr, wzr, <condition code>
4240/// tbz w9, #0, 0x44
Joel Jonesaff09bf2017-07-06 14:17:36 +00004241/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004242/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004243/// \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004244/// b.<condition code>
Joel Jonesaff09bf2017-07-06 14:17:36 +00004245/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004246///
Chad Rosier4aeab5f2016-03-21 13:43:58 +00004247/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4248/// compare's constant operand is power of 2.
Balaram Makame9b27252016-03-10 17:54:55 +00004249///
4250/// Examples:
Joel Jonesaff09bf2017-07-06 14:17:36 +00004251/// \code
Balaram Makame9b27252016-03-10 17:54:55 +00004252/// and w8, w8, #0x400
4253/// cbnz w8, L1
Joel Jonesaff09bf2017-07-06 14:17:36 +00004254/// \endcode
Balaram Makame9b27252016-03-10 17:54:55 +00004255/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004256/// \code
Balaram Makame9b27252016-03-10 17:54:55 +00004257/// tbnz w8, #10, L1
Joel Jonesaff09bf2017-07-06 14:17:36 +00004258/// \endcode
Balaram Makame9b27252016-03-10 17:54:55 +00004259///
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004260/// \param MI Conditional Branch
4261/// \return True when the simple conditional branch is generated
4262///
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004263bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004264 bool IsNegativeBranch = false;
4265 bool IsTestAndBranch = false;
4266 unsigned TargetBBInMI = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004267 switch (MI.getOpcode()) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004268 default:
4269 llvm_unreachable("Unknown branch instruction?");
4270 case AArch64::Bcc:
4271 return false;
4272 case AArch64::CBZW:
4273 case AArch64::CBZX:
4274 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004275 break;
4276 case AArch64::CBNZW:
4277 case AArch64::CBNZX:
4278 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004279 IsNegativeBranch = true;
4280 break;
4281 case AArch64::TBZW:
4282 case AArch64::TBZX:
4283 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004284 IsTestAndBranch = true;
4285 break;
4286 case AArch64::TBNZW:
4287 case AArch64::TBNZX:
4288 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004289 IsNegativeBranch = true;
4290 IsTestAndBranch = true;
4291 break;
4292 }
4293 // So we increment a zero register and test for bits other
4294 // than bit 0? Conservatively bail out in case the verifier
4295 // missed this case.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004296 if (IsTestAndBranch && MI.getOperand(1).getImm())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004297 return false;
4298
4299 // Find Definition.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004300 assert(MI.getParent() && "Incomplete machine instruciton\n");
4301 MachineBasicBlock *MBB = MI.getParent();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004302 MachineFunction *MF = MBB->getParent();
4303 MachineRegisterInfo *MRI = &MF->getRegInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004304 unsigned VReg = MI.getOperand(0).getReg();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004305 if (!TargetRegisterInfo::isVirtualRegister(VReg))
4306 return false;
4307
4308 MachineInstr *DefMI = MRI->getVRegDef(VReg);
4309
Balaram Makame9b27252016-03-10 17:54:55 +00004310 // Look through COPY instructions to find definition.
4311 while (DefMI->isCopy()) {
4312 unsigned CopyVReg = DefMI->getOperand(1).getReg();
4313 if (!MRI->hasOneNonDBGUse(CopyVReg))
4314 return false;
4315 if (!MRI->hasOneDef(CopyVReg))
4316 return false;
4317 DefMI = MRI->getVRegDef(CopyVReg);
4318 }
4319
4320 switch (DefMI->getOpcode()) {
4321 default:
4322 return false;
4323 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4324 case AArch64::ANDWri:
4325 case AArch64::ANDXri: {
4326 if (IsTestAndBranch)
4327 return false;
4328 if (DefMI->getParent() != MBB)
4329 return false;
4330 if (!MRI->hasOneNonDBGUse(VReg))
4331 return false;
4332
Quentin Colombetabe2d012016-04-25 20:54:08 +00004333 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
Balaram Makame9b27252016-03-10 17:54:55 +00004334 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
Quentin Colombetabe2d012016-04-25 20:54:08 +00004335 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
Balaram Makame9b27252016-03-10 17:54:55 +00004336 if (!isPowerOf2_64(Mask))
4337 return false;
4338
4339 MachineOperand &MO = DefMI->getOperand(1);
4340 unsigned NewReg = MO.getReg();
4341 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4342 return false;
4343
4344 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4345
4346 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004347 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4348 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004349 unsigned Imm = Log2_64(Mask);
Renato Golin179d1f52016-04-23 19:30:52 +00004350 unsigned Opc = (Imm < 32)
4351 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4352 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004353 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4354 .addReg(NewReg)
4355 .addImm(Imm)
4356 .addMBB(TBB);
Matthias Braune25bbd02016-05-03 04:54:16 +00004357 // Register lives on to the CBZ now.
4358 MO.setIsKill(false);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004359
4360 // For immediate smaller than 32, we need to use the 32-bit
4361 // variant (W) in all cases. Indeed the 64-bit variant does not
4362 // allow to encode them.
4363 // Therefore, if the input register is 64-bit, we need to take the
4364 // 32-bit sub-part.
4365 if (!Is32Bit && Imm < 32)
4366 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004367 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004368 return true;
4369 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004370 // Look for CSINC
Balaram Makame9b27252016-03-10 17:54:55 +00004371 case AArch64::CSINCWr:
4372 case AArch64::CSINCXr: {
4373 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4374 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4375 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4376 DefMI->getOperand(2).getReg() == AArch64::XZR))
4377 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004378
Balaram Makame9b27252016-03-10 17:54:55 +00004379 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4380 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004381
Balaram Makame9b27252016-03-10 17:54:55 +00004382 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
Balaram Makame9b27252016-03-10 17:54:55 +00004383 // Convert only when the condition code is not modified between
4384 // the CSINC and the branch. The CC may be used by other
4385 // instructions in between.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00004386 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
Balaram Makame9b27252016-03-10 17:54:55 +00004387 return false;
4388 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004389 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4390 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004391 if (IsNegativeBranch)
4392 CC = AArch64CC::getInvertedCondCode(CC);
4393 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004394 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004395 return true;
4396 }
4397 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004398}
Alex Lorenzf3630112015-08-18 22:52:15 +00004399
4400std::pair<unsigned, unsigned>
4401AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4402 const unsigned Mask = AArch64II::MO_FRAGMENT;
4403 return std::make_pair(TF & Mask, TF & ~Mask);
4404}
4405
4406ArrayRef<std::pair<unsigned, const char *>>
4407AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4408 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004409
Hal Finkel982e8d42015-08-30 08:07:29 +00004410 static const std::pair<unsigned, const char *> TargetFlags[] = {
Jessica Paquette809d7082017-07-28 03:21:58 +00004411 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4412 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4413 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
Alex Lorenzf3630112015-08-18 22:52:15 +00004414 {MO_HI12, "aarch64-hi12"}};
4415 return makeArrayRef(TargetFlags);
4416}
4417
4418ArrayRef<std::pair<unsigned, const char *>>
4419AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4420 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004421
Hal Finkel982e8d42015-08-30 08:07:29 +00004422 static const std::pair<unsigned, const char *> TargetFlags[] = {
Jessica Paquette809d7082017-07-28 03:21:58 +00004423 {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
Alex Lorenzf3630112015-08-18 22:52:15 +00004424 return makeArrayRef(TargetFlags);
4425}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004426
Geoff Berry6748abe2017-07-13 02:28:54 +00004427ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
4428AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4429 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
Geoff Berryb1e87142017-07-14 21:44:12 +00004430 {{MOSuppressPair, "aarch64-suppress-pair"},
4431 {MOStridedAccess, "aarch64-strided-access"}};
Geoff Berry6748abe2017-07-13 02:28:54 +00004432 return makeArrayRef(TargetFlags);
4433}
4434
Jessica Paquette809d7082017-07-28 03:21:58 +00004435size_t AArch64InstrInfo::getOutliningCallOverhead(
4436 MachineBasicBlock::iterator &StartIt,
4437 MachineBasicBlock::iterator &EndIt) const {
4438 // Is this a tail-call?
4439 if (EndIt->isTerminator())
4440 return 1; // Yes, so we don't need to save/restore LR.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004441
Jessica Paquette809d7082017-07-28 03:21:58 +00004442 // No, so save + restore LR.
4443 return 3;
4444}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004445
Jessica Paquette809d7082017-07-28 03:21:58 +00004446size_t AArch64InstrInfo::getOutliningFrameOverhead(
4447 MachineBasicBlock::iterator &StartIt,
4448 MachineBasicBlock::iterator &EndIt) const {
4449
4450 // Is this a tail-call?
4451 if (EndIt->isTerminator())
4452 return 0; // Yes, so we already have a return.
4453
4454 // No, so we have to add a return to the end.
4455 return 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004456}
4457
4458bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
Jessica Paquetteeac86332017-03-24 23:00:21 +00004459 return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004460}
4461
4462AArch64GenInstrInfo::MachineOutlinerInstrType
4463AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
4464
4465 MachineFunction *MF = MI.getParent()->getParent();
4466 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
4467
4468 // Don't outline LOHs.
4469 if (FuncInfo->getLOHRelated().count(&MI))
4470 return MachineOutlinerInstrType::Illegal;
4471
4472 // Don't allow debug values to impact outlining type.
4473 if (MI.isDebugValue() || MI.isIndirectDebugValue())
4474 return MachineOutlinerInstrType::Invisible;
4475
4476 // Is this a terminator for a basic block?
4477 if (MI.isTerminator()) {
4478
4479 // Is this the end of a function?
4480 if (MI.getParent()->succ_empty())
Jessica Paquette809d7082017-07-28 03:21:58 +00004481 return MachineOutlinerInstrType::Legal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004482
4483 // It's not, so don't outline it.
4484 return MachineOutlinerInstrType::Illegal;
4485 }
4486
4487 // Don't outline positions.
4488 if (MI.isPosition())
4489 return MachineOutlinerInstrType::Illegal;
4490
4491 // Make sure none of the operands are un-outlinable.
4492 for (const MachineOperand &MOP : MI.operands())
4493 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
4494 MOP.isTargetIndex())
4495 return MachineOutlinerInstrType::Illegal;
4496
4497 // Don't outline anything that uses the link register.
4498 if (MI.modifiesRegister(AArch64::LR, &RI) ||
4499 MI.readsRegister(AArch64::LR, &RI))
Jessica Paquette809d7082017-07-28 03:21:58 +00004500 return MachineOutlinerInstrType::Illegal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004501
4502 // Does this use the stack?
4503 if (MI.modifiesRegister(AArch64::SP, &RI) ||
4504 MI.readsRegister(AArch64::SP, &RI)) {
4505
4506 // Is it a memory operation?
4507 if (MI.mayLoadOrStore()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00004508 unsigned Base; // Filled with the base regiser of MI.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004509 int64_t Offset; // Filled with the offset of MI.
4510 unsigned DummyWidth;
4511
4512 // Does it allow us to offset the base register and is the base SP?
4513 if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
Jessica Paquette809d7082017-07-28 03:21:58 +00004514 Base != AArch64::SP)
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004515 return MachineOutlinerInstrType::Illegal;
4516
4517 // Find the minimum/maximum offset for this instruction and check if
4518 // fixing it up would be in range.
4519 int64_t MinOffset, MaxOffset;
4520 unsigned DummyScale;
4521 getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset,
4522 MaxOffset);
4523
4524 // TODO: We should really test what happens if an instruction overflows.
4525 // This is tricky to test with IR tests, but when the outliner is moved
4526 // to a MIR test, it really ought to be checked.
Jessica Paquette5d59a4e2017-03-20 15:51:45 +00004527 if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
Jessica Paquette809d7082017-07-28 03:21:58 +00004528 return MachineOutlinerInstrType::Illegal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004529
4530 // It's in range, so we can outline it.
4531 return MachineOutlinerInstrType::Legal;
4532 }
4533
4534 // We can't fix it up, so don't outline it.
4535 return MachineOutlinerInstrType::Illegal;
4536 }
4537
4538 return MachineOutlinerInstrType::Legal;
4539}
4540
4541void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
4542 for (MachineInstr &MI : MBB) {
4543 unsigned Base, Width;
4544 int64_t Offset;
4545
4546 // Is this a load or store with an immediate offset with SP as the base?
4547 if (!MI.mayLoadOrStore() ||
4548 !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
4549 Base != AArch64::SP)
4550 continue;
4551
4552 // It is, so we have to fix it up.
4553 unsigned Scale;
4554 int64_t Dummy1, Dummy2;
4555
4556 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
4557 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
4558 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
4559 assert(Scale != 0 && "Unexpected opcode!");
4560
4561 // We've pushed the return address to the stack, so add 16 to the offset.
4562 // This is safe, since we already checked if it would overflow when we
4563 // checked if this instruction was legal to outline.
Jessica Paquette809d7082017-07-28 03:21:58 +00004564 int64_t NewImm = (Offset + 16) / Scale;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004565 StackOffsetOperand.setImm(NewImm);
4566 }
4567}
4568
4569void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
4570 MachineFunction &MF,
4571 bool IsTailCall) const {
4572
4573 // If this is a tail call outlined function, then there's already a return.
4574 if (IsTailCall)
4575 return;
4576
4577 // It's not a tail call, so we have to insert the return ourselves.
4578 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
4579 .addReg(AArch64::LR, RegState::Undef);
4580 MBB.insert(MBB.end(), ret);
4581
4582 // Walk over the basic block and fix up all the stack accesses.
4583 fixupPostOutline(MBB);
4584}
4585
4586void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
4587 MachineFunction &MF,
4588 bool IsTailCall) const {}
4589
4590MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
4591 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
4592 MachineFunction &MF, bool IsTailCall) const {
4593
4594 // Are we tail calling?
4595 if (IsTailCall) {
4596 // If yes, then we can just branch to the label.
4597 It = MBB.insert(It,
4598 BuildMI(MF, DebugLoc(), get(AArch64::B))
4599 .addGlobalAddress(M.getNamedValue(MF.getName())));
4600 return It;
4601 }
4602
4603 // We're not tail calling, so we have to save LR before the call and restore
4604 // it after.
4605 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
4606 .addReg(AArch64::SP, RegState::Define)
4607 .addReg(AArch64::LR)
4608 .addReg(AArch64::SP)
4609 .addImm(-16);
4610 It = MBB.insert(It, STRXpre);
4611 It++;
4612
4613 // Insert the call.
4614 It = MBB.insert(It,
4615 BuildMI(MF, DebugLoc(), get(AArch64::BL))
4616 .addGlobalAddress(M.getNamedValue(MF.getName())));
4617
4618 It++;
4619
4620 // Restore the link register.
4621 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
4622 .addReg(AArch64::SP, RegState::Define)
4623 .addReg(AArch64::LR)
4624 .addReg(AArch64::SP)
4625 .addImm(16);
4626 It = MBB.insert(It, LDRXpost);
4627
4628 return It;
4629}