blob: cb268828455e6b1b2aa1dc5ced694ad097a2ba89 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
Jessica Paquetteea8cc092017-03-17 22:26:55 +000015#include "AArch64MachineFunctionInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000016#include "AArch64Subtarget.h"
17#include "MCTargetDesc/AArch64AddressingModes.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000018#include "Utils/AArch64BaseInfo.h"
19#include "llvm/ADT/ArrayRef.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/CodeGen/MachineBasicBlock.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000023#include "llvm/CodeGen/MachineFrameInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000024#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineInstr.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000026#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineMemOperand.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000028#include "llvm/CodeGen/MachineOperand.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000029#include "llvm/CodeGen/MachineRegisterInfo.h"
Diana Picus4b972882016-09-13 07:45:17 +000030#include "llvm/CodeGen/StackMaps.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000031#include "llvm/IR/DebugLoc.h"
32#include "llvm/IR/GlobalValue.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000033#include "llvm/MC/MCInst.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000034#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/Support/Casting.h"
36#include "llvm/Support/CodeGen.h"
37#include "llvm/Support/CommandLine.h"
38#include "llvm/Support/Compiler.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000039#include "llvm/Support/ErrorHandling.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000040#include "llvm/Support/MathExtras.h"
41#include "llvm/Target/TargetMachine.h"
42#include "llvm/Target/TargetOptions.h"
43#include "llvm/Target/TargetRegisterInfo.h"
44#include "llvm/Target/TargetSubtargetInfo.h"
45#include <cassert>
46#include <cstdint>
47#include <iterator>
48#include <utility>
Tim Northover3b0846e2014-05-24 12:50:23 +000049
50using namespace llvm;
51
52#define GET_INSTRINFO_CTOR_DTOR
53#include "AArch64GenInstrInfo.inc"
54
George Burgess IV381fc0e2016-08-25 01:05:08 +000055static const MachineMemOperand::Flags MOSuppressPair =
Justin Lebar288b3372016-07-14 18:15:20 +000056 MachineMemOperand::MOTargetFlag1;
57
Matt Arsenaulte8da1452016-08-02 08:06:17 +000058static cl::opt<unsigned>
59TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
60 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
61
62static cl::opt<unsigned>
63CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
64 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
65
66static cl::opt<unsigned>
67BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
68 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
69
Tim Northover3b0846e2014-05-24 12:50:23 +000070AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
71 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
Eric Christophera0de2532015-03-18 20:37:30 +000072 RI(STI.getTargetTriple()), Subtarget(STI) {}
Tim Northover3b0846e2014-05-24 12:50:23 +000073
74/// GetInstSize - Return the number of bytes of code the specified
75/// instruction may be. This returns the maximum number of bytes.
Sjoerd Meijer89217f82016-07-28 16:32:22 +000076unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000077 const MachineBasicBlock &MBB = *MI.getParent();
Tim Northoverd5531f72014-06-17 11:31:42 +000078 const MachineFunction *MF = MBB.getParent();
79 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +000080
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000081 if (MI.getOpcode() == AArch64::INLINEASM)
82 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
Tim Northoverd5531f72014-06-17 11:31:42 +000083
Diana Picus4b972882016-09-13 07:45:17 +000084 // FIXME: We currently only handle pseudoinstructions that don't get expanded
85 // before the assembly printer.
86 unsigned NumBytes = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000087 const MCInstrDesc &Desc = MI.getDesc();
Tim Northover3b0846e2014-05-24 12:50:23 +000088 switch (Desc.getOpcode()) {
89 default:
Diana Picusc65d8bd2016-07-27 15:13:25 +000090 // Anything not explicitly designated otherwise is a normal 4-byte insn.
Diana Picus4b972882016-09-13 07:45:17 +000091 NumBytes = 4;
92 break;
Tim Northover3b0846e2014-05-24 12:50:23 +000093 case TargetOpcode::DBG_VALUE:
94 case TargetOpcode::EH_LABEL:
95 case TargetOpcode::IMPLICIT_DEF:
96 case TargetOpcode::KILL:
Diana Picus4b972882016-09-13 07:45:17 +000097 NumBytes = 0;
98 break;
99 case TargetOpcode::STACKMAP:
100 // The upper bound for a stackmap intrinsic is the full length of its shadow
101 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
102 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
103 break;
104 case TargetOpcode::PATCHPOINT:
105 // The size of the patchpoint intrinsic is the number of bytes requested
106 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
107 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
108 break;
Diana Picusab5a4c72016-08-01 08:38:49 +0000109 case AArch64::TLSDESC_CALLSEQ:
110 // This gets lowered to an instruction sequence which takes 16 bytes
Diana Picus4b972882016-09-13 07:45:17 +0000111 NumBytes = 16;
112 break;
Tim Northover3b0846e2014-05-24 12:50:23 +0000113 }
114
Diana Picus4b972882016-09-13 07:45:17 +0000115 return NumBytes;
Tim Northover3b0846e2014-05-24 12:50:23 +0000116}
117
118static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
119 SmallVectorImpl<MachineOperand> &Cond) {
120 // Block ends with fall-through condbranch.
121 switch (LastInst->getOpcode()) {
122 default:
123 llvm_unreachable("Unknown branch instruction?");
124 case AArch64::Bcc:
125 Target = LastInst->getOperand(1).getMBB();
126 Cond.push_back(LastInst->getOperand(0));
127 break;
128 case AArch64::CBZW:
129 case AArch64::CBZX:
130 case AArch64::CBNZW:
131 case AArch64::CBNZX:
132 Target = LastInst->getOperand(1).getMBB();
133 Cond.push_back(MachineOperand::CreateImm(-1));
134 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
135 Cond.push_back(LastInst->getOperand(0));
136 break;
137 case AArch64::TBZW:
138 case AArch64::TBZX:
139 case AArch64::TBNZW:
140 case AArch64::TBNZX:
141 Target = LastInst->getOperand(2).getMBB();
142 Cond.push_back(MachineOperand::CreateImm(-1));
143 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
144 Cond.push_back(LastInst->getOperand(0));
145 Cond.push_back(LastInst->getOperand(1));
146 }
147}
148
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000149static unsigned getBranchDisplacementBits(unsigned Opc) {
150 switch (Opc) {
151 default:
152 llvm_unreachable("unexpected opcode!");
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000153 case AArch64::B:
154 return 64;
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000155 case AArch64::TBNZW:
156 case AArch64::TBZW:
157 case AArch64::TBNZX:
158 case AArch64::TBZX:
159 return TBZDisplacementBits;
160 case AArch64::CBNZW:
161 case AArch64::CBZW:
162 case AArch64::CBNZX:
163 case AArch64::CBZX:
164 return CBZDisplacementBits;
165 case AArch64::Bcc:
166 return BCCDisplacementBits;
167 }
168}
169
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000170bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
171 int64_t BrOffset) const {
172 unsigned Bits = getBranchDisplacementBits(BranchOp);
173 assert(Bits >= 3 && "max branch displacement must be enough to jump"
174 "over conditional branch expansion");
175 return isIntN(Bits, BrOffset / 4);
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000176}
177
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000178MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock(
179 const MachineInstr &MI) const {
180 switch (MI.getOpcode()) {
181 default:
182 llvm_unreachable("unexpected opcode!");
183 case AArch64::B:
184 return MI.getOperand(0).getMBB();
185 case AArch64::TBZW:
186 case AArch64::TBNZW:
187 case AArch64::TBZX:
188 case AArch64::TBNZX:
189 return MI.getOperand(2).getMBB();
190 case AArch64::CBZW:
191 case AArch64::CBNZW:
192 case AArch64::CBZX:
193 case AArch64::CBNZX:
194 case AArch64::Bcc:
195 return MI.getOperand(1).getMBB();
196 }
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000197}
198
Tim Northover3b0846e2014-05-24 12:50:23 +0000199// Branch analysis.
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000200bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
201 MachineBasicBlock *&TBB,
202 MachineBasicBlock *&FBB,
203 SmallVectorImpl<MachineOperand> &Cond,
204 bool AllowModify) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000205 // If the block has no terminators, it just falls into the block after it.
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000206 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
207 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000208 return false;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000209
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000210 if (!isUnpredicatedTerminator(*I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000211 return false;
212
213 // Get the last instruction in the block.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000214 MachineInstr *LastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000215
216 // If there is only one terminator instruction, process it.
217 unsigned LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000218 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000219 if (isUncondBranchOpcode(LastOpc)) {
220 TBB = LastInst->getOperand(0).getMBB();
221 return false;
222 }
223 if (isCondBranchOpcode(LastOpc)) {
224 // Block ends with fall-through condbranch.
225 parseCondBranch(LastInst, TBB, Cond);
226 return false;
227 }
228 return true; // Can't handle indirect branch.
229 }
230
231 // Get the instruction before it if it is a terminator.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000232 MachineInstr *SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000233 unsigned SecondLastOpc = SecondLastInst->getOpcode();
234
235 // If AllowModify is true and the block ends with two or more unconditional
236 // branches, delete all but the first unconditional branch.
237 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
238 while (isUncondBranchOpcode(SecondLastOpc)) {
239 LastInst->eraseFromParent();
240 LastInst = SecondLastInst;
241 LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000242 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000243 // Return now the only terminator is an unconditional branch.
244 TBB = LastInst->getOperand(0).getMBB();
245 return false;
246 } else {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000247 SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000248 SecondLastOpc = SecondLastInst->getOpcode();
249 }
250 }
251 }
252
253 // If there are three terminators, we don't know what sort of block this is.
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000254 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000255 return true;
256
257 // If the block ends with a B and a Bcc, handle it.
258 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
259 parseCondBranch(SecondLastInst, TBB, Cond);
260 FBB = LastInst->getOperand(0).getMBB();
261 return false;
262 }
263
264 // If the block ends with two unconditional branches, handle it. The second
265 // one is not executed, so remove it.
266 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
267 TBB = SecondLastInst->getOperand(0).getMBB();
268 I = LastInst;
269 if (AllowModify)
270 I->eraseFromParent();
271 return false;
272 }
273
274 // ...likewise if it ends with an indirect branch followed by an unconditional
275 // branch.
276 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
277 I = LastInst;
278 if (AllowModify)
279 I->eraseFromParent();
280 return true;
281 }
282
283 // Otherwise, can't handle this.
284 return true;
285}
286
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000287bool AArch64InstrInfo::reverseBranchCondition(
Tim Northover3b0846e2014-05-24 12:50:23 +0000288 SmallVectorImpl<MachineOperand> &Cond) const {
289 if (Cond[0].getImm() != -1) {
290 // Regular Bcc
291 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
292 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
293 } else {
294 // Folded compare-and-branch
295 switch (Cond[1].getImm()) {
296 default:
297 llvm_unreachable("Unknown conditional branch!");
298 case AArch64::CBZW:
299 Cond[1].setImm(AArch64::CBNZW);
300 break;
301 case AArch64::CBNZW:
302 Cond[1].setImm(AArch64::CBZW);
303 break;
304 case AArch64::CBZX:
305 Cond[1].setImm(AArch64::CBNZX);
306 break;
307 case AArch64::CBNZX:
308 Cond[1].setImm(AArch64::CBZX);
309 break;
310 case AArch64::TBZW:
311 Cond[1].setImm(AArch64::TBNZW);
312 break;
313 case AArch64::TBNZW:
314 Cond[1].setImm(AArch64::TBZW);
315 break;
316 case AArch64::TBZX:
317 Cond[1].setImm(AArch64::TBNZX);
318 break;
319 case AArch64::TBNZX:
320 Cond[1].setImm(AArch64::TBZX);
321 break;
322 }
323 }
324
325 return false;
326}
327
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000328unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000329 int *BytesRemoved) const {
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000330 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
331 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000332 return 0;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000333
Tim Northover3b0846e2014-05-24 12:50:23 +0000334 if (!isUncondBranchOpcode(I->getOpcode()) &&
335 !isCondBranchOpcode(I->getOpcode()))
336 return 0;
337
338 // Remove the branch.
339 I->eraseFromParent();
340
341 I = MBB.end();
342
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000343 if (I == MBB.begin()) {
344 if (BytesRemoved)
345 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000346 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000347 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000348 --I;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000349 if (!isCondBranchOpcode(I->getOpcode())) {
350 if (BytesRemoved)
351 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000352 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000353 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000354
355 // Remove the branch.
356 I->eraseFromParent();
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000357 if (BytesRemoved)
358 *BytesRemoved = 8;
359
Tim Northover3b0846e2014-05-24 12:50:23 +0000360 return 2;
361}
362
363void AArch64InstrInfo::instantiateCondBranch(
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000364 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000365 ArrayRef<MachineOperand> Cond) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000366 if (Cond[0].getImm() != -1) {
367 // Regular Bcc
368 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
369 } else {
370 // Folded compare-and-branch
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000371 // Note that we use addOperand instead of addReg to keep the flags.
Tim Northover3b0846e2014-05-24 12:50:23 +0000372 const MachineInstrBuilder MIB =
Diana Picus116bbab2017-01-13 09:58:52 +0000373 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
Tim Northover3b0846e2014-05-24 12:50:23 +0000374 if (Cond.size() > 3)
375 MIB.addImm(Cond[3].getImm());
376 MIB.addMBB(TBB);
377 }
378}
379
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +0000380unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000381 MachineBasicBlock *TBB,
382 MachineBasicBlock *FBB,
383 ArrayRef<MachineOperand> Cond,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000384 const DebugLoc &DL,
385 int *BytesAdded) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000386 // Shouldn't be a fall through.
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +0000387 assert(TBB && "insertBranch must not be told to insert a fallthrough");
Tim Northover3b0846e2014-05-24 12:50:23 +0000388
389 if (!FBB) {
390 if (Cond.empty()) // Unconditional branch?
391 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
392 else
393 instantiateCondBranch(MBB, DL, TBB, Cond);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000394
395 if (BytesAdded)
396 *BytesAdded = 4;
397
Tim Northover3b0846e2014-05-24 12:50:23 +0000398 return 1;
399 }
400
401 // Two-way conditional branch.
402 instantiateCondBranch(MBB, DL, TBB, Cond);
403 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000404
405 if (BytesAdded)
406 *BytesAdded = 8;
407
Tim Northover3b0846e2014-05-24 12:50:23 +0000408 return 2;
409}
410
411// Find the original register that VReg is copied from.
412static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
413 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
414 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
415 if (!DefMI->isFullCopy())
416 return VReg;
417 VReg = DefMI->getOperand(1).getReg();
418 }
419 return VReg;
420}
421
422// Determine if VReg is defined by an instruction that can be folded into a
423// csel instruction. If so, return the folded opcode, and the replacement
424// register.
425static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
426 unsigned *NewVReg = nullptr) {
427 VReg = removeCopies(MRI, VReg);
428 if (!TargetRegisterInfo::isVirtualRegister(VReg))
429 return 0;
430
431 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
432 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
433 unsigned Opc = 0;
434 unsigned SrcOpNum = 0;
435 switch (DefMI->getOpcode()) {
436 case AArch64::ADDSXri:
437 case AArch64::ADDSWri:
438 // if NZCV is used, do not fold.
439 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
440 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000441 // fall-through to ADDXri and ADDWri.
442 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000443 case AArch64::ADDXri:
444 case AArch64::ADDWri:
445 // add x, 1 -> csinc.
446 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
447 DefMI->getOperand(3).getImm() != 0)
448 return 0;
449 SrcOpNum = 1;
450 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
451 break;
452
453 case AArch64::ORNXrr:
454 case AArch64::ORNWrr: {
455 // not x -> csinv, represented as orn dst, xzr, src.
456 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
457 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
458 return 0;
459 SrcOpNum = 2;
460 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
461 break;
462 }
463
464 case AArch64::SUBSXrr:
465 case AArch64::SUBSWrr:
466 // if NZCV is used, do not fold.
467 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
468 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000469 // fall-through to SUBXrr and SUBWrr.
470 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000471 case AArch64::SUBXrr:
472 case AArch64::SUBWrr: {
473 // neg x -> csneg, represented as sub dst, xzr, src.
474 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
475 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
476 return 0;
477 SrcOpNum = 2;
478 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
479 break;
480 }
481 default:
482 return 0;
483 }
484 assert(Opc && SrcOpNum && "Missing parameters");
485
486 if (NewVReg)
487 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
488 return Opc;
489}
490
491bool AArch64InstrInfo::canInsertSelect(
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000492 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000493 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
494 int &FalseCycles) const {
495 // Check register classes.
496 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
497 const TargetRegisterClass *RC =
Eric Christophera0de2532015-03-18 20:37:30 +0000498 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
Tim Northover3b0846e2014-05-24 12:50:23 +0000499 if (!RC)
500 return false;
501
502 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
503 unsigned ExtraCondLat = Cond.size() != 1;
504
505 // GPRs are handled by csel.
506 // FIXME: Fold in x+1, -x, and ~x when applicable.
507 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
508 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
509 // Single-cycle csel, csinc, csinv, and csneg.
510 CondCycles = 1 + ExtraCondLat;
511 TrueCycles = FalseCycles = 1;
512 if (canFoldIntoCSel(MRI, TrueReg))
513 TrueCycles = 0;
514 else if (canFoldIntoCSel(MRI, FalseReg))
515 FalseCycles = 0;
516 return true;
517 }
518
519 // Scalar floating point is handled by fcsel.
520 // FIXME: Form fabs, fmin, and fmax when applicable.
521 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
522 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
523 CondCycles = 5 + ExtraCondLat;
524 TrueCycles = FalseCycles = 2;
525 return true;
526 }
527
528 // Can't do vectors.
529 return false;
530}
531
532void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000533 MachineBasicBlock::iterator I,
534 const DebugLoc &DL, unsigned DstReg,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000535 ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000536 unsigned TrueReg, unsigned FalseReg) const {
537 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
538
539 // Parse the condition code, see parseCondBranch() above.
540 AArch64CC::CondCode CC;
541 switch (Cond.size()) {
542 default:
543 llvm_unreachable("Unknown condition opcode in Cond");
544 case 1: // b.cc
545 CC = AArch64CC::CondCode(Cond[0].getImm());
546 break;
547 case 3: { // cbz/cbnz
548 // We must insert a compare against 0.
549 bool Is64Bit;
550 switch (Cond[1].getImm()) {
551 default:
552 llvm_unreachable("Unknown branch opcode in Cond");
553 case AArch64::CBZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000554 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000555 CC = AArch64CC::EQ;
556 break;
557 case AArch64::CBZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000558 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000559 CC = AArch64CC::EQ;
560 break;
561 case AArch64::CBNZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000562 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000563 CC = AArch64CC::NE;
564 break;
565 case AArch64::CBNZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000566 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000567 CC = AArch64CC::NE;
568 break;
569 }
570 unsigned SrcReg = Cond[2].getReg();
571 if (Is64Bit) {
572 // cmp reg, #0 is actually subs xzr, reg, #0.
573 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
574 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
575 .addReg(SrcReg)
576 .addImm(0)
577 .addImm(0);
578 } else {
579 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
580 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
581 .addReg(SrcReg)
582 .addImm(0)
583 .addImm(0);
584 }
585 break;
586 }
587 case 4: { // tbz/tbnz
588 // We must insert a tst instruction.
589 switch (Cond[1].getImm()) {
590 default:
591 llvm_unreachable("Unknown branch opcode in Cond");
592 case AArch64::TBZW:
593 case AArch64::TBZX:
594 CC = AArch64CC::EQ;
595 break;
596 case AArch64::TBNZW:
597 case AArch64::TBNZX:
598 CC = AArch64CC::NE;
599 break;
600 }
601 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
602 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
603 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
604 .addReg(Cond[2].getReg())
605 .addImm(
606 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
607 else
608 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
609 .addReg(Cond[2].getReg())
610 .addImm(
611 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
612 break;
613 }
614 }
615
616 unsigned Opc = 0;
617 const TargetRegisterClass *RC = nullptr;
618 bool TryFold = false;
619 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
620 RC = &AArch64::GPR64RegClass;
621 Opc = AArch64::CSELXr;
622 TryFold = true;
623 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
624 RC = &AArch64::GPR32RegClass;
625 Opc = AArch64::CSELWr;
626 TryFold = true;
627 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
628 RC = &AArch64::FPR64RegClass;
629 Opc = AArch64::FCSELDrrr;
630 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
631 RC = &AArch64::FPR32RegClass;
632 Opc = AArch64::FCSELSrrr;
633 }
634 assert(RC && "Unsupported regclass");
635
636 // Try folding simple instructions into the csel.
637 if (TryFold) {
638 unsigned NewVReg = 0;
639 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
640 if (FoldedOpc) {
641 // The folded opcodes csinc, csinc and csneg apply the operation to
642 // FalseReg, so we need to invert the condition.
643 CC = AArch64CC::getInvertedCondCode(CC);
644 TrueReg = FalseReg;
645 } else
646 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
647
648 // Fold the operation. Leave any dead instructions for DCE to clean up.
649 if (FoldedOpc) {
650 FalseReg = NewVReg;
651 Opc = FoldedOpc;
652 // The extends the live range of NewVReg.
653 MRI.clearKillFlags(NewVReg);
654 }
655 }
656
657 // Pull all virtual register into the appropriate class.
658 MRI.constrainRegClass(TrueReg, RC);
659 MRI.constrainRegClass(FalseReg, RC);
660
661 // Insert the csel.
662 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
663 CC);
664}
665
Lawrence Hu687097a2015-07-23 23:55:28 +0000666/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000667static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
668 uint64_t Imm = MI.getOperand(1).getImm();
Weiming Zhaob33a5552015-07-23 19:24:53 +0000669 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
670 uint64_t Encoding;
671 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
672}
673
Jiangning Liucd296372014-07-29 02:09:26 +0000674// FIXME: this implementation should be micro-architecture dependent, so a
675// micro-architecture target hook should be introduced here in future.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000676bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
Matthias Braun651cff42016-06-02 18:03:53 +0000677 if (!Subtarget.hasCustomCheapAsMoveHandling())
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000678 return MI.isAsCheapAsAMove();
Jiangning Liucd296372014-07-29 02:09:26 +0000679
Evandro Menezesd23324a2016-05-04 20:47:25 +0000680 unsigned Imm;
681
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000682 switch (MI.getOpcode()) {
Jiangning Liucd296372014-07-29 02:09:26 +0000683 default:
684 return false;
685
686 // add/sub on register without shift
687 case AArch64::ADDWri:
688 case AArch64::ADDXri:
689 case AArch64::SUBWri:
690 case AArch64::SUBXri:
Matthias Braun651cff42016-06-02 18:03:53 +0000691 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000692 MI.getOperand(3).getImm() == 0);
Evandro Menezesd23324a2016-05-04 20:47:25 +0000693
694 // add/sub on register with shift
695 case AArch64::ADDWrs:
696 case AArch64::ADDXrs:
697 case AArch64::SUBWrs:
698 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000699 Imm = MI.getOperand(3).getImm();
Matthias Braun651cff42016-06-02 18:03:53 +0000700 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
Evandro Menezesd23324a2016-05-04 20:47:25 +0000701 AArch64_AM::getArithShiftValue(Imm) < 4);
Jiangning Liucd296372014-07-29 02:09:26 +0000702
703 // logical ops on immediate
704 case AArch64::ANDWri:
705 case AArch64::ANDXri:
706 case AArch64::EORWri:
707 case AArch64::EORXri:
708 case AArch64::ORRWri:
709 case AArch64::ORRXri:
710 return true;
711
712 // logical ops on register without shift
713 case AArch64::ANDWrr:
714 case AArch64::ANDXrr:
715 case AArch64::BICWrr:
716 case AArch64::BICXrr:
717 case AArch64::EONWrr:
718 case AArch64::EONXrr:
719 case AArch64::EORWrr:
720 case AArch64::EORXrr:
721 case AArch64::ORNWrr:
722 case AArch64::ORNXrr:
723 case AArch64::ORRWrr:
724 case AArch64::ORRXrr:
725 return true;
Evandro Menezesd23324a2016-05-04 20:47:25 +0000726
727 // logical ops on register with shift
728 case AArch64::ANDWrs:
729 case AArch64::ANDXrs:
730 case AArch64::BICWrs:
731 case AArch64::BICXrs:
732 case AArch64::EONWrs:
733 case AArch64::EONXrs:
734 case AArch64::EORWrs:
735 case AArch64::EORXrs:
736 case AArch64::ORNWrs:
737 case AArch64::ORNXrs:
738 case AArch64::ORRWrs:
739 case AArch64::ORRXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000740 Imm = MI.getOperand(3).getImm();
Matthias Braun651cff42016-06-02 18:03:53 +0000741 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
Evandro Menezesd23324a2016-05-04 20:47:25 +0000742 AArch64_AM::getShiftValue(Imm) < 4 &&
743 AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
744
Weiming Zhaob33a5552015-07-23 19:24:53 +0000745 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
746 // ORRXri, it is as cheap as MOV
747 case AArch64::MOVi32imm:
748 return canBeExpandedToORR(MI, 32);
749 case AArch64::MOVi64imm:
750 return canBeExpandedToORR(MI, 64);
Haicheng Wu711ca862016-07-12 15:31:41 +0000751
Haicheng Wuf0b01272016-07-15 00:27:01 +0000752 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
753 // feature.
Haicheng Wu711ca862016-07-12 15:31:41 +0000754 case AArch64::FMOVS0:
755 case AArch64::FMOVD0:
756 return Subtarget.hasZeroCycleZeroing();
Haicheng Wuf0b01272016-07-15 00:27:01 +0000757 case TargetOpcode::COPY:
758 return (Subtarget.hasZeroCycleZeroing() &&
759 (MI.getOperand(1).getReg() == AArch64::WZR ||
760 MI.getOperand(1).getReg() == AArch64::XZR));
Jiangning Liucd296372014-07-29 02:09:26 +0000761 }
762
763 llvm_unreachable("Unknown opcode to check as cheap as a move!");
764}
765
Balaram Makamb4419f92017-04-08 03:30:15 +0000766bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const {
767 if (MI.getNumOperands() < 4)
768 return false;
769 unsigned ShOpVal = MI.getOperand(3).getImm();
770 unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal);
771 if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL &&
772 ShImm < 4)
773 return true;
774 return false;
775}
776
Tim Northover3b0846e2014-05-24 12:50:23 +0000777bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
778 unsigned &SrcReg, unsigned &DstReg,
779 unsigned &SubIdx) const {
780 switch (MI.getOpcode()) {
781 default:
782 return false;
783 case AArch64::SBFMXri: // aka sxtw
784 case AArch64::UBFMXri: // aka uxtw
785 // Check for the 32 -> 64 bit extension case, these instructions can do
786 // much more.
787 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
788 return false;
789 // This is a signed or unsigned 32 -> 64 bit extension.
790 SrcReg = MI.getOperand(1).getReg();
791 DstReg = MI.getOperand(0).getReg();
792 SubIdx = AArch64::sub_32;
793 return true;
794 }
795}
796
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000797bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
798 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
Eric Christophera0de2532015-03-18 20:37:30 +0000799 const TargetRegisterInfo *TRI = &getRegisterInfo();
Chad Rosier3528c1e2014-09-08 14:43:48 +0000800 unsigned BaseRegA = 0, BaseRegB = 0;
Chad Rosier0da267d2016-03-09 16:46:48 +0000801 int64_t OffsetA = 0, OffsetB = 0;
802 unsigned WidthA = 0, WidthB = 0;
Chad Rosier3528c1e2014-09-08 14:43:48 +0000803
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000804 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
805 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
Chad Rosier3528c1e2014-09-08 14:43:48 +0000806
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000807 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
808 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
Chad Rosier3528c1e2014-09-08 14:43:48 +0000809 return false;
810
811 // Retrieve the base register, offset from the base register and width. Width
812 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
813 // base registers are identical, and the offset of a lower memory access +
814 // the width doesn't overlap the offset of a higher memory access,
815 // then the memory accesses are different.
Sanjoy Dasb666ea32015-06-15 18:44:14 +0000816 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
817 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
Chad Rosier3528c1e2014-09-08 14:43:48 +0000818 if (BaseRegA == BaseRegB) {
819 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
820 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
821 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
822 if (LowOffset + LowWidth <= HighOffset)
823 return true;
824 }
825 }
826 return false;
827}
828
Tim Northover3b0846e2014-05-24 12:50:23 +0000829/// analyzeCompare - For a comparison instruction, return the source registers
830/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
831/// Return true if the comparison instruction can be analyzed.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000832bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
Tim Northover3b0846e2014-05-24 12:50:23 +0000833 unsigned &SrcReg2, int &CmpMask,
834 int &CmpValue) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000835 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000836 default:
837 break;
838 case AArch64::SUBSWrr:
839 case AArch64::SUBSWrs:
840 case AArch64::SUBSWrx:
841 case AArch64::SUBSXrr:
842 case AArch64::SUBSXrs:
843 case AArch64::SUBSXrx:
844 case AArch64::ADDSWrr:
845 case AArch64::ADDSWrs:
846 case AArch64::ADDSWrx:
847 case AArch64::ADDSXrr:
848 case AArch64::ADDSXrs:
849 case AArch64::ADDSXrx:
850 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000851 SrcReg = MI.getOperand(1).getReg();
852 SrcReg2 = MI.getOperand(2).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000853 CmpMask = ~0;
854 CmpValue = 0;
855 return true;
856 case AArch64::SUBSWri:
857 case AArch64::ADDSWri:
858 case AArch64::SUBSXri:
859 case AArch64::ADDSXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000860 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000861 SrcReg2 = 0;
862 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000863 // FIXME: In order to convert CmpValue to 0 or 1
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000864 CmpValue = MI.getOperand(2).getImm() != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000865 return true;
866 case AArch64::ANDSWri:
867 case AArch64::ANDSXri:
868 // ANDS does not use the same encoding scheme as the others xxxS
869 // instructions.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000870 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000871 SrcReg2 = 0;
872 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000873 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
874 // while the type of CmpValue is int. When converting uint64_t to int,
875 // the high 32 bits of uint64_t will be lost.
876 // In fact it causes a bug in spec2006-483.xalancbmk
877 // CmpValue is only used to compare with zero in OptimizeCompareInstr
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000878 CmpValue = AArch64_AM::decodeLogicalImmediate(
879 MI.getOperand(2).getImm(),
880 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000881 return true;
882 }
883
884 return false;
885}
886
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000887static bool UpdateOperandRegClass(MachineInstr &Instr) {
888 MachineBasicBlock *MBB = Instr.getParent();
Tim Northover3b0846e2014-05-24 12:50:23 +0000889 assert(MBB && "Can't get MachineBasicBlock here");
890 MachineFunction *MF = MBB->getParent();
891 assert(MF && "Can't get MachineFunction here");
Eric Christopher6c901622015-01-28 03:51:33 +0000892 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
893 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000894 MachineRegisterInfo *MRI = &MF->getRegInfo();
895
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000896 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
Tim Northover3b0846e2014-05-24 12:50:23 +0000897 ++OpIdx) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000898 MachineOperand &MO = Instr.getOperand(OpIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +0000899 const TargetRegisterClass *OpRegCstraints =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000900 Instr.getRegClassConstraint(OpIdx, TII, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000901
902 // If there's no constraint, there's nothing to do.
903 if (!OpRegCstraints)
904 continue;
905 // If the operand is a frame index, there's nothing to do here.
906 // A frame index operand will resolve correctly during PEI.
907 if (MO.isFI())
908 continue;
909
910 assert(MO.isReg() &&
911 "Operand has register constraints without being a register!");
912
913 unsigned Reg = MO.getReg();
914 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
915 if (!OpRegCstraints->contains(Reg))
916 return false;
917 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
918 !MRI->constrainRegClass(Reg, OpRegCstraints))
919 return false;
920 }
921
922 return true;
923}
924
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000925/// \brief Return the opcode that does not set flags when possible - otherwise
926/// return the original opcode. The caller is responsible to do the actual
927/// substitution and legality checking.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000928static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000929 // Don't convert all compare instructions, because for some the zero register
930 // encoding becomes the sp register.
931 bool MIDefinesZeroReg = false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000932 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000933 MIDefinesZeroReg = true;
934
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000935 switch (MI.getOpcode()) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000936 default:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000937 return MI.getOpcode();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000938 case AArch64::ADDSWrr:
939 return AArch64::ADDWrr;
940 case AArch64::ADDSWri:
941 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
942 case AArch64::ADDSWrs:
943 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
944 case AArch64::ADDSWrx:
945 return AArch64::ADDWrx;
946 case AArch64::ADDSXrr:
947 return AArch64::ADDXrr;
948 case AArch64::ADDSXri:
949 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
950 case AArch64::ADDSXrs:
951 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
952 case AArch64::ADDSXrx:
953 return AArch64::ADDXrx;
954 case AArch64::SUBSWrr:
955 return AArch64::SUBWrr;
956 case AArch64::SUBSWri:
957 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
958 case AArch64::SUBSWrs:
959 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
960 case AArch64::SUBSWrx:
961 return AArch64::SUBWrx;
962 case AArch64::SUBSXrr:
963 return AArch64::SUBXrr;
964 case AArch64::SUBSXri:
965 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
966 case AArch64::SUBSXrs:
967 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
968 case AArch64::SUBSXrx:
969 return AArch64::SUBXrx;
970 }
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000971}
Tim Northover3b0846e2014-05-24 12:50:23 +0000972
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000973enum AccessKind {
974 AK_Write = 0x01,
975 AK_Read = 0x10,
976 AK_All = 0x11
977};
978
979/// True when condition flags are accessed (either by writing or reading)
980/// on the instruction trace starting at From and ending at To.
981///
982/// Note: If From and To are from different blocks it's assumed CC are accessed
983/// on the path.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000984static bool areCFlagsAccessedBetweenInstrs(
985 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
986 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000987 // Early exit if To is at the beginning of the BB.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000988 if (To == To->getParent()->begin())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000989 return true;
990
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000991 // Check whether the instructions are in the same basic block
992 // If not, assume the condition flags might get modified somewhere.
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000993 if (To->getParent() != From->getParent())
994 return true;
995
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000996 // From must be above To.
Duncan P. N. Exon Smith18720962016-09-11 18:51:28 +0000997 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
998 [From](MachineInstr &MI) {
999 return MI.getIterator() == From;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001000 }) != To->getParent()->rend());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001001
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001002 // We iterate backward starting \p To until we hit \p From.
1003 for (--To; To != From; --To) {
1004 const MachineInstr &Instr = *To;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001005
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001006 if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1007 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001008 return true;
1009 }
1010 return false;
1011}
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001012
1013/// Try to optimize a compare instruction. A compare instruction is an
1014/// instruction which produces AArch64::NZCV. It can be truly compare instruction
1015/// when there are no uses of its destination register.
1016///
1017/// The following steps are tried in order:
1018/// 1. Convert CmpInstr into an unconditional version.
1019/// 2. Remove CmpInstr if above there is an instruction producing a needed
1020/// condition code or an instruction which can be converted into such an instruction.
1021/// Only comparison with zero is supported.
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001022bool AArch64InstrInfo::optimizeCompareInstr(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001023 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001024 int CmpValue, const MachineRegisterInfo *MRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001025 assert(CmpInstr.getParent());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001026 assert(MRI);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001027
1028 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001029 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001030 if (DeadNZCVIdx != -1) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001031 if (CmpInstr.definesRegister(AArch64::WZR) ||
1032 CmpInstr.definesRegister(AArch64::XZR)) {
1033 CmpInstr.eraseFromParent();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001034 return true;
1035 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001036 unsigned Opc = CmpInstr.getOpcode();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001037 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
1038 if (NewOpc == Opc)
1039 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001040 const MCInstrDesc &MCID = get(NewOpc);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001041 CmpInstr.setDesc(MCID);
1042 CmpInstr.RemoveOperand(DeadNZCVIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001043 bool succeeded = UpdateOperandRegClass(CmpInstr);
1044 (void)succeeded;
1045 assert(succeeded && "Some operands reg class are incompatible!");
1046 return true;
1047 }
1048
1049 // Continue only if we have a "ri" where immediate is zero.
Jiangning Liudcc651f2014-08-08 14:19:29 +00001050 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1051 // function.
1052 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
Tim Northover3b0846e2014-05-24 12:50:23 +00001053 if (CmpValue != 0 || SrcReg2 != 0)
1054 return false;
1055
1056 // CmpInstr is a Compare instruction if destination register is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001057 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
Tim Northover3b0846e2014-05-24 12:50:23 +00001058 return false;
1059
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001060 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001061}
Tim Northover3b0846e2014-05-24 12:50:23 +00001062
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001063/// Get opcode of S version of Instr.
1064/// If Instr is S version its opcode is returned.
1065/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1066/// or we are not interested in it.
1067static unsigned sForm(MachineInstr &Instr) {
1068 switch (Instr.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001069 default:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001070 return AArch64::INSTRUCTION_LIST_END;
1071
Tim Northover3b0846e2014-05-24 12:50:23 +00001072 case AArch64::ADDSWrr:
1073 case AArch64::ADDSWri:
1074 case AArch64::ADDSXrr:
1075 case AArch64::ADDSXri:
1076 case AArch64::SUBSWrr:
1077 case AArch64::SUBSWri:
1078 case AArch64::SUBSXrr:
1079 case AArch64::SUBSXri:
Eugene Zelenko049b0172017-01-06 00:30:53 +00001080 return Instr.getOpcode();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001081
1082 case AArch64::ADDWrr: return AArch64::ADDSWrr;
1083 case AArch64::ADDWri: return AArch64::ADDSWri;
1084 case AArch64::ADDXrr: return AArch64::ADDSXrr;
1085 case AArch64::ADDXri: return AArch64::ADDSXri;
1086 case AArch64::ADCWr: return AArch64::ADCSWr;
1087 case AArch64::ADCXr: return AArch64::ADCSXr;
1088 case AArch64::SUBWrr: return AArch64::SUBSWrr;
1089 case AArch64::SUBWri: return AArch64::SUBSWri;
1090 case AArch64::SUBXrr: return AArch64::SUBSXrr;
1091 case AArch64::SUBXri: return AArch64::SUBSXri;
1092 case AArch64::SBCWr: return AArch64::SBCSWr;
1093 case AArch64::SBCXr: return AArch64::SBCSXr;
1094 case AArch64::ANDWri: return AArch64::ANDSWri;
1095 case AArch64::ANDXri: return AArch64::ANDSXri;
Tim Northover3b0846e2014-05-24 12:50:23 +00001096 }
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001097}
1098
1099/// Check if AArch64::NZCV should be alive in successors of MBB.
1100static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1101 for (auto *BB : MBB->successors())
1102 if (BB->isLiveIn(AArch64::NZCV))
1103 return true;
1104 return false;
1105}
1106
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001107namespace {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001108
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001109struct UsedNZCV {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001110 bool N = false;
1111 bool Z = false;
1112 bool C = false;
1113 bool V = false;
1114
1115 UsedNZCV() = default;
1116
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001117 UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
1118 this->N |= UsedFlags.N;
1119 this->Z |= UsedFlags.Z;
1120 this->C |= UsedFlags.C;
1121 this->V |= UsedFlags.V;
1122 return *this;
1123 }
1124};
Eugene Zelenko049b0172017-01-06 00:30:53 +00001125
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001126} // end anonymous namespace
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001127
1128/// Find a condition code used by the instruction.
1129/// Returns AArch64CC::Invalid if either the instruction does not use condition
1130/// codes or we don't optimize CmpInstr in the presence of such instructions.
1131static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1132 switch (Instr.getOpcode()) {
1133 default:
1134 return AArch64CC::Invalid;
1135
1136 case AArch64::Bcc: {
1137 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1138 assert(Idx >= 2);
1139 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1140 }
1141
1142 case AArch64::CSINVWr:
1143 case AArch64::CSINVXr:
1144 case AArch64::CSINCWr:
1145 case AArch64::CSINCXr:
1146 case AArch64::CSELWr:
1147 case AArch64::CSELXr:
1148 case AArch64::CSNEGWr:
1149 case AArch64::CSNEGXr:
1150 case AArch64::FCSELSrrr:
1151 case AArch64::FCSELDrrr: {
1152 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1153 assert(Idx >= 1);
1154 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1155 }
1156 }
1157}
1158
1159static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1160 assert(CC != AArch64CC::Invalid);
1161 UsedNZCV UsedFlags;
1162 switch (CC) {
1163 default:
1164 break;
1165
1166 case AArch64CC::EQ: // Z set
1167 case AArch64CC::NE: // Z clear
1168 UsedFlags.Z = true;
1169 break;
1170
1171 case AArch64CC::HI: // Z clear and C set
1172 case AArch64CC::LS: // Z set or C clear
1173 UsedFlags.Z = true;
1174 case AArch64CC::HS: // C set
1175 case AArch64CC::LO: // C clear
1176 UsedFlags.C = true;
1177 break;
1178
1179 case AArch64CC::MI: // N set
1180 case AArch64CC::PL: // N clear
1181 UsedFlags.N = true;
1182 break;
1183
1184 case AArch64CC::VS: // V set
1185 case AArch64CC::VC: // V clear
1186 UsedFlags.V = true;
1187 break;
1188
1189 case AArch64CC::GT: // Z clear, N and V the same
1190 case AArch64CC::LE: // Z set, N and V differ
1191 UsedFlags.Z = true;
1192 case AArch64CC::GE: // N and V the same
1193 case AArch64CC::LT: // N and V differ
1194 UsedFlags.N = true;
1195 UsedFlags.V = true;
1196 break;
1197 }
1198 return UsedFlags;
1199}
1200
1201static bool isADDSRegImm(unsigned Opcode) {
1202 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1203}
1204
1205static bool isSUBSRegImm(unsigned Opcode) {
1206 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1207}
1208
1209/// Check if CmpInstr can be substituted by MI.
1210///
1211/// CmpInstr can be substituted:
1212/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1213/// - and, MI and CmpInstr are from the same MachineBB
1214/// - and, condition flags are not alive in successors of the CmpInstr parent
1215/// - and, if MI opcode is the S form there must be no defs of flags between
1216/// MI and CmpInstr
1217/// or if MI opcode is not the S form there must be neither defs of flags
1218/// nor uses of flags between MI and CmpInstr.
1219/// - and C/V flags are not used after CmpInstr
1220static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1221 const TargetRegisterInfo *TRI) {
1222 assert(MI);
1223 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1224 assert(CmpInstr);
1225
1226 const unsigned CmpOpcode = CmpInstr->getOpcode();
1227 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1228 return false;
1229
1230 if (MI->getParent() != CmpInstr->getParent())
1231 return false;
1232
1233 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1234 return false;
1235
1236 AccessKind AccessToCheck = AK_Write;
1237 if (sForm(*MI) != MI->getOpcode())
1238 AccessToCheck = AK_All;
1239 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1240 return false;
1241
1242 UsedNZCV NZCVUsedAfterCmp;
1243 for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
1244 I != E; ++I) {
1245 const MachineInstr &Instr = *I;
1246 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1247 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1248 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1249 return false;
1250 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1251 }
1252
1253 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1254 break;
1255 }
1256
1257 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1258}
1259
1260/// Substitute an instruction comparing to zero with another instruction
1261/// which produces needed condition flags.
1262///
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001263/// Return true on success.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001264bool AArch64InstrInfo::substituteCmpToZero(
1265 MachineInstr &CmpInstr, unsigned SrcReg,
1266 const MachineRegisterInfo *MRI) const {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001267 assert(MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001268 // Get the unique definition of SrcReg.
1269 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1270 if (!MI)
1271 return false;
1272
1273 const TargetRegisterInfo *TRI = &getRegisterInfo();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001274
1275 unsigned NewOpc = sForm(*MI);
1276 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1277 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001278
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001279 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001280 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001281
1282 // Update the instruction to set NZCV.
1283 MI->setDesc(get(NewOpc));
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001284 CmpInstr.eraseFromParent();
1285 bool succeeded = UpdateOperandRegClass(*MI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001286 (void)succeeded;
1287 assert(succeeded && "Some operands reg class are incompatible!");
1288 MI->addRegisterDefined(AArch64::NZCV, TRI);
1289 return true;
1290}
1291
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001292bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1293 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001294 return false;
1295
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001296 MachineBasicBlock &MBB = *MI.getParent();
1297 DebugLoc DL = MI.getDebugLoc();
1298 unsigned Reg = MI.getOperand(0).getReg();
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001299 const GlobalValue *GV =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001300 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001301 const TargetMachine &TM = MBB.getParent()->getTarget();
1302 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1303 const unsigned char MO_NC = AArch64II::MO_NC;
1304
1305 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1306 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1307 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1308 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001309 .addReg(Reg, RegState::Kill)
1310 .addImm(0)
1311 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001312 } else if (TM.getCodeModel() == CodeModel::Large) {
1313 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
Evandro Menezes7960b2e2017-01-18 18:57:08 +00001314 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001315 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1316 .addReg(Reg, RegState::Kill)
1317 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1318 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1319 .addReg(Reg, RegState::Kill)
Evandro Menezes7960b2e2017-01-18 18:57:08 +00001320 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1321 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1322 .addReg(Reg, RegState::Kill)
1323 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001324 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001325 .addReg(Reg, RegState::Kill)
1326 .addImm(0)
1327 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001328 } else {
1329 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1330 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1331 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1332 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1333 .addReg(Reg, RegState::Kill)
1334 .addGlobalAddress(GV, 0, LoFlags)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001335 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001336 }
1337
1338 MBB.erase(MI);
1339
1340 return true;
1341}
1342
Tim Northover3b0846e2014-05-24 12:50:23 +00001343/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001344bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1345 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001346 default:
1347 break;
1348 case AArch64::ADDSWrs:
1349 case AArch64::ADDSXrs:
1350 case AArch64::ADDWrs:
1351 case AArch64::ADDXrs:
1352 case AArch64::ANDSWrs:
1353 case AArch64::ANDSXrs:
1354 case AArch64::ANDWrs:
1355 case AArch64::ANDXrs:
1356 case AArch64::BICSWrs:
1357 case AArch64::BICSXrs:
1358 case AArch64::BICWrs:
1359 case AArch64::BICXrs:
Tim Northover3b0846e2014-05-24 12:50:23 +00001360 case AArch64::EONWrs:
1361 case AArch64::EONXrs:
1362 case AArch64::EORWrs:
1363 case AArch64::EORXrs:
1364 case AArch64::ORNWrs:
1365 case AArch64::ORNXrs:
1366 case AArch64::ORRWrs:
1367 case AArch64::ORRXrs:
1368 case AArch64::SUBSWrs:
1369 case AArch64::SUBSXrs:
1370 case AArch64::SUBWrs:
1371 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001372 if (MI.getOperand(3).isImm()) {
1373 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001374 return (val != 0);
1375 }
1376 break;
1377 }
1378 return false;
1379}
1380
1381/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001382bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1383 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001384 default:
1385 break;
1386 case AArch64::ADDSWrx:
1387 case AArch64::ADDSXrx:
1388 case AArch64::ADDSXrx64:
1389 case AArch64::ADDWrx:
1390 case AArch64::ADDXrx:
1391 case AArch64::ADDXrx64:
1392 case AArch64::SUBSWrx:
1393 case AArch64::SUBSXrx:
1394 case AArch64::SUBSXrx64:
1395 case AArch64::SUBWrx:
1396 case AArch64::SUBXrx:
1397 case AArch64::SUBXrx64:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001398 if (MI.getOperand(3).isImm()) {
1399 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001400 return (val != 0);
1401 }
1402 break;
1403 }
1404
1405 return false;
1406}
1407
1408// Return true if this instruction simply sets its single destination register
1409// to zero. This is equivalent to a register rename of the zero-register.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001410bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1411 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001412 default:
1413 break;
1414 case AArch64::MOVZWi:
1415 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001416 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1417 assert(MI.getDesc().getNumOperands() == 3 &&
1418 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001419 return true;
1420 }
1421 break;
1422 case AArch64::ANDWri: // and Rd, Rzr, #imm
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001423 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001424 case AArch64::ANDXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001425 return MI.getOperand(1).getReg() == AArch64::XZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001426 case TargetOpcode::COPY:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001427 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001428 }
1429 return false;
1430}
1431
1432// Return true if this instruction simply renames a general register without
1433// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001434bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1435 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001436 default:
1437 break;
1438 case TargetOpcode::COPY: {
1439 // GPR32 copies will by lowered to ORRXrs
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001440 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001441 return (AArch64::GPR32RegClass.contains(DstReg) ||
1442 AArch64::GPR64RegClass.contains(DstReg));
1443 }
1444 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001445 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1446 assert(MI.getDesc().getNumOperands() == 4 &&
1447 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001448 return true;
1449 }
Renato Golin541d7e72014-08-01 17:27:31 +00001450 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001451 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001452 if (MI.getOperand(2).getImm() == 0) {
1453 assert(MI.getDesc().getNumOperands() == 4 &&
1454 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001455 return true;
1456 }
Renato Golin541d7e72014-08-01 17:27:31 +00001457 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001458 }
1459 return false;
1460}
1461
1462// Return true if this instruction simply renames a general register without
1463// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001464bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1465 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001466 default:
1467 break;
1468 case TargetOpcode::COPY: {
1469 // FPR64 copies will by lowered to ORR.16b
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001470 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001471 return (AArch64::FPR64RegClass.contains(DstReg) ||
1472 AArch64::FPR128RegClass.contains(DstReg));
1473 }
1474 case AArch64::ORRv16i8:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001475 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1476 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001477 "invalid ORRv16i8 operands");
1478 return true;
1479 }
Renato Golin541d7e72014-08-01 17:27:31 +00001480 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001481 }
1482 return false;
1483}
1484
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001485unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001486 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001487 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001488 default:
1489 break;
1490 case AArch64::LDRWui:
1491 case AArch64::LDRXui:
1492 case AArch64::LDRBui:
1493 case AArch64::LDRHui:
1494 case AArch64::LDRSui:
1495 case AArch64::LDRDui:
1496 case AArch64::LDRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001497 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1498 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1499 FrameIndex = MI.getOperand(1).getIndex();
1500 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001501 }
1502 break;
1503 }
1504
1505 return 0;
1506}
1507
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001508unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001509 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001510 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001511 default:
1512 break;
1513 case AArch64::STRWui:
1514 case AArch64::STRXui:
1515 case AArch64::STRBui:
1516 case AArch64::STRHui:
1517 case AArch64::STRSui:
1518 case AArch64::STRDui:
1519 case AArch64::STRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001520 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1521 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1522 FrameIndex = MI.getOperand(1).getIndex();
1523 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001524 }
1525 break;
1526 }
1527 return 0;
1528}
1529
1530/// Return true if this is load/store scales or extends its register offset.
1531/// This refers to scaling a dynamic index as opposed to scaled immediates.
1532/// MI should be a memory op that allows scaled addressing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001533bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1534 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001535 default:
1536 break;
1537 case AArch64::LDRBBroW:
1538 case AArch64::LDRBroW:
1539 case AArch64::LDRDroW:
1540 case AArch64::LDRHHroW:
1541 case AArch64::LDRHroW:
1542 case AArch64::LDRQroW:
1543 case AArch64::LDRSBWroW:
1544 case AArch64::LDRSBXroW:
1545 case AArch64::LDRSHWroW:
1546 case AArch64::LDRSHXroW:
1547 case AArch64::LDRSWroW:
1548 case AArch64::LDRSroW:
1549 case AArch64::LDRWroW:
1550 case AArch64::LDRXroW:
1551 case AArch64::STRBBroW:
1552 case AArch64::STRBroW:
1553 case AArch64::STRDroW:
1554 case AArch64::STRHHroW:
1555 case AArch64::STRHroW:
1556 case AArch64::STRQroW:
1557 case AArch64::STRSroW:
1558 case AArch64::STRWroW:
1559 case AArch64::STRXroW:
1560 case AArch64::LDRBBroX:
1561 case AArch64::LDRBroX:
1562 case AArch64::LDRDroX:
1563 case AArch64::LDRHHroX:
1564 case AArch64::LDRHroX:
1565 case AArch64::LDRQroX:
1566 case AArch64::LDRSBWroX:
1567 case AArch64::LDRSBXroX:
1568 case AArch64::LDRSHWroX:
1569 case AArch64::LDRSHXroX:
1570 case AArch64::LDRSWroX:
1571 case AArch64::LDRSroX:
1572 case AArch64::LDRWroX:
1573 case AArch64::LDRXroX:
1574 case AArch64::STRBBroX:
1575 case AArch64::STRBroX:
1576 case AArch64::STRDroX:
1577 case AArch64::STRHHroX:
1578 case AArch64::STRHroX:
1579 case AArch64::STRQroX:
1580 case AArch64::STRSroX:
1581 case AArch64::STRWroX:
1582 case AArch64::STRXroX:
1583
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001584 unsigned Val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001585 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1586 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1587 }
1588 return false;
1589}
1590
1591/// Check all MachineMemOperands for a hint to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001592bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001593 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
Justin Lebar288b3372016-07-14 18:15:20 +00001594 return MMO->getFlags() & MOSuppressPair;
1595 });
Tim Northover3b0846e2014-05-24 12:50:23 +00001596}
1597
1598/// Set a flag on the first MachineMemOperand to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001599void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1600 if (MI.memoperands_empty())
Tim Northover3b0846e2014-05-24 12:50:23 +00001601 return;
Justin Lebar288b3372016-07-14 18:15:20 +00001602 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
Tim Northover3b0846e2014-05-24 12:50:23 +00001603}
1604
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001605bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1606 switch (Opc) {
1607 default:
1608 return false;
1609 case AArch64::STURSi:
1610 case AArch64::STURDi:
1611 case AArch64::STURQi:
1612 case AArch64::STURBBi:
1613 case AArch64::STURHHi:
1614 case AArch64::STURWi:
1615 case AArch64::STURXi:
1616 case AArch64::LDURSi:
1617 case AArch64::LDURDi:
1618 case AArch64::LDURQi:
1619 case AArch64::LDURWi:
1620 case AArch64::LDURXi:
1621 case AArch64::LDURSWi:
1622 case AArch64::LDURHHi:
1623 case AArch64::LDURBBi:
1624 case AArch64::LDURSBWi:
1625 case AArch64::LDURSHWi:
1626 return true;
1627 }
1628}
1629
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001630bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1631 return isUnscaledLdSt(MI.getOpcode());
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001632}
1633
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001634// Is this a candidate for ld/st merging or pairing? For example, we don't
1635// touch volatiles or load/stores that have a hint to avoid pair formation.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001636bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001637 // If this is a volatile load/store, don't mess with it.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001638 if (MI.hasOrderedMemoryRef())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001639 return false;
1640
1641 // Make sure this is a reg+imm (as opposed to an address reloc).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001642 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1643 if (!MI.getOperand(2).isImm())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001644 return false;
1645
1646 // Can't merge/pair if the instruction modifies the base register.
1647 // e.g., ldr x0, [x0]
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001648 unsigned BaseReg = MI.getOperand(1).getReg();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001649 const TargetRegisterInfo *TRI = &getRegisterInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001650 if (MI.modifiesRegister(BaseReg, TRI))
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001651 return false;
1652
1653 // Check if this load/store has a hint to avoid pair formation.
1654 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1655 if (isLdStPairSuppressed(MI))
1656 return false;
1657
Matthias Braun651cff42016-06-02 18:03:53 +00001658 // On some CPUs quad load/store pairs are slower than two single load/stores.
Evandro Menezes7784cac2017-01-24 17:34:31 +00001659 if (Subtarget.isPaired128Slow()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001660 switch (MI.getOpcode()) {
Matthias Braunbcfd2362016-05-28 01:06:51 +00001661 default:
1662 break;
Matthias Braunbcfd2362016-05-28 01:06:51 +00001663 case AArch64::LDURQi:
1664 case AArch64::STURQi:
1665 case AArch64::LDRQui:
1666 case AArch64::STRQui:
1667 return false;
Evandro Menezes8d53f882016-04-13 18:31:45 +00001668 }
Matthias Braunbcfd2362016-05-28 01:06:51 +00001669 }
Evandro Menezes8d53f882016-04-13 18:31:45 +00001670
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001671 return true;
1672}
1673
Chad Rosierc27a18f2016-03-09 16:00:35 +00001674bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001675 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
Chad Rosierc27a18f2016-03-09 16:00:35 +00001676 const TargetRegisterInfo *TRI) const {
Geoff Berry22dfbc52016-08-12 15:26:00 +00001677 unsigned Width;
1678 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001679}
1680
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001681bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001682 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
Chad Rosier3528c1e2014-09-08 14:43:48 +00001683 const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001684 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001685 // Handle only loads/stores with base register followed by immediate offset.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001686 if (LdSt.getNumExplicitOperands() == 3) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001687 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001688 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001689 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001690 } else if (LdSt.getNumExplicitOperands() == 4) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001691 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001692 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1693 !LdSt.getOperand(3).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001694 return false;
1695 } else
Chad Rosier3528c1e2014-09-08 14:43:48 +00001696 return false;
1697
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001698 // Get the scaling factor for the instruction and set the width for the
1699 // instruction.
Chad Rosier0da267d2016-03-09 16:46:48 +00001700 unsigned Scale = 0;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001701 int64_t Dummy1, Dummy2;
1702
1703 // If this returns false, then it's an instruction we don't want to handle.
1704 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
Chad Rosier3528c1e2014-09-08 14:43:48 +00001705 return false;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001706
1707 // Compute the offset. Offset is calculated as the immediate operand
1708 // multiplied by the scaling factor. Unscaled instructions have scaling factor
1709 // set to 1.
1710 if (LdSt.getNumExplicitOperands() == 3) {
1711 BaseReg = LdSt.getOperand(1).getReg();
1712 Offset = LdSt.getOperand(2).getImm() * Scale;
1713 } else {
1714 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1715 BaseReg = LdSt.getOperand(2).getReg();
1716 Offset = LdSt.getOperand(3).getImm() * Scale;
1717 }
1718 return true;
1719}
1720
1721MachineOperand&
1722AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
1723 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1724 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1);
1725 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1726 return OfsOp;
1727}
1728
1729bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1730 unsigned &Width, int64_t &MinOffset,
1731 int64_t &MaxOffset) const {
1732 switch (Opcode) {
1733 // Not a memory operation or something we want to handle.
1734 default:
1735 Scale = Width = 0;
1736 MinOffset = MaxOffset = 0;
1737 return false;
1738 case AArch64::STRWpost:
1739 case AArch64::LDRWpost:
1740 Width = 32;
1741 Scale = 4;
1742 MinOffset = -256;
1743 MaxOffset = 255;
1744 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001745 case AArch64::LDURQi:
1746 case AArch64::STURQi:
1747 Width = 16;
1748 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001749 MinOffset = -256;
1750 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001751 break;
1752 case AArch64::LDURXi:
1753 case AArch64::LDURDi:
1754 case AArch64::STURXi:
1755 case AArch64::STURDi:
1756 Width = 8;
1757 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001758 MinOffset = -256;
1759 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001760 break;
1761 case AArch64::LDURWi:
1762 case AArch64::LDURSi:
1763 case AArch64::LDURSWi:
1764 case AArch64::STURWi:
1765 case AArch64::STURSi:
1766 Width = 4;
1767 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001768 MinOffset = -256;
1769 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001770 break;
1771 case AArch64::LDURHi:
1772 case AArch64::LDURHHi:
1773 case AArch64::LDURSHXi:
1774 case AArch64::LDURSHWi:
1775 case AArch64::STURHi:
1776 case AArch64::STURHHi:
1777 Width = 2;
1778 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001779 MinOffset = -256;
1780 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001781 break;
1782 case AArch64::LDURBi:
1783 case AArch64::LDURBBi:
1784 case AArch64::LDURSBXi:
1785 case AArch64::LDURSBWi:
1786 case AArch64::STURBi:
1787 case AArch64::STURBBi:
1788 Width = 1;
1789 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001790 MinOffset = -256;
1791 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001792 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001793 case AArch64::LDPQi:
1794 case AArch64::LDNPQi:
1795 case AArch64::STPQi:
1796 case AArch64::STNPQi:
1797 Scale = 16;
1798 Width = 32;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001799 MinOffset = -64;
1800 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001801 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001802 case AArch64::LDRQui:
1803 case AArch64::STRQui:
1804 Scale = Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001805 MinOffset = 0;
1806 MaxOffset = 4095;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001807 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001808 case AArch64::LDPXi:
1809 case AArch64::LDPDi:
1810 case AArch64::LDNPXi:
1811 case AArch64::LDNPDi:
1812 case AArch64::STPXi:
1813 case AArch64::STPDi:
1814 case AArch64::STNPXi:
1815 case AArch64::STNPDi:
1816 Scale = 8;
1817 Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001818 MinOffset = -64;
1819 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001820 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001821 case AArch64::LDRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001822 case AArch64::LDRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001823 case AArch64::STRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001824 case AArch64::STRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001825 Scale = Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001826 MinOffset = 0;
1827 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001828 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001829 case AArch64::LDPWi:
1830 case AArch64::LDPSi:
1831 case AArch64::LDNPWi:
1832 case AArch64::LDNPSi:
1833 case AArch64::STPWi:
1834 case AArch64::STPSi:
1835 case AArch64::STNPWi:
1836 case AArch64::STNPSi:
1837 Scale = 4;
1838 Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001839 MinOffset = -64;
1840 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001841 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001842 case AArch64::LDRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001843 case AArch64::LDRSui:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001844 case AArch64::LDRSWui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001845 case AArch64::STRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001846 case AArch64::STRSui:
1847 Scale = Width = 4;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001848 MinOffset = 0;
1849 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001850 break;
Chad Rosier84a0afd2015-09-18 14:13:18 +00001851 case AArch64::LDRHui:
1852 case AArch64::LDRHHui:
1853 case AArch64::STRHui:
1854 case AArch64::STRHHui:
1855 Scale = Width = 2;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001856 MinOffset = 0;
1857 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001858 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001859 case AArch64::LDRBui:
1860 case AArch64::LDRBBui:
1861 case AArch64::STRBui:
1862 case AArch64::STRBBui:
1863 Scale = Width = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001864 MinOffset = 0;
1865 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001866 break;
Chad Rosier064261d2016-02-01 20:54:36 +00001867 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00001868
Chad Rosier3528c1e2014-09-08 14:43:48 +00001869 return true;
1870}
1871
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001872// Scale the unscaled offsets. Returns false if the unscaled offset can't be
1873// scaled.
1874static bool scaleOffset(unsigned Opc, int64_t &Offset) {
1875 unsigned OffsetStride = 1;
1876 switch (Opc) {
1877 default:
1878 return false;
1879 case AArch64::LDURQi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001880 case AArch64::STURQi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001881 OffsetStride = 16;
1882 break;
1883 case AArch64::LDURXi:
1884 case AArch64::LDURDi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001885 case AArch64::STURXi:
1886 case AArch64::STURDi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001887 OffsetStride = 8;
1888 break;
1889 case AArch64::LDURWi:
1890 case AArch64::LDURSi:
1891 case AArch64::LDURSWi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001892 case AArch64::STURWi:
1893 case AArch64::STURSi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001894 OffsetStride = 4;
1895 break;
1896 }
1897 // If the byte-offset isn't a multiple of the stride, we can't scale this
1898 // offset.
1899 if (Offset % OffsetStride != 0)
1900 return false;
1901
1902 // Convert the byte-offset used by unscaled into an "element" offset used
1903 // by the scaled pair load/store instructions.
1904 Offset /= OffsetStride;
1905 return true;
1906}
1907
1908static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
1909 if (FirstOpc == SecondOpc)
1910 return true;
1911 // We can also pair sign-ext and zero-ext instructions.
1912 switch (FirstOpc) {
1913 default:
1914 return false;
1915 case AArch64::LDRWui:
1916 case AArch64::LDURWi:
1917 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
1918 case AArch64::LDRSWui:
1919 case AArch64::LDURSWi:
1920 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
1921 }
1922 // These instructions can't be paired based on their opcodes.
1923 return false;
1924}
1925
Tim Northover3b0846e2014-05-24 12:50:23 +00001926/// Detect opportunities for ldp/stp formation.
1927///
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001928/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001929bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
1930 MachineInstr &SecondLdSt,
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001931 unsigned NumLoads) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001932 // Only cluster up to a single pair.
1933 if (NumLoads > 1)
1934 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001935
Geoff Berry22dfbc52016-08-12 15:26:00 +00001936 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
1937 return false;
1938
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001939 // Can we pair these instructions based on their opcodes?
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001940 unsigned FirstOpc = FirstLdSt.getOpcode();
1941 unsigned SecondOpc = SecondLdSt.getOpcode();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001942 if (!canPairLdStOpc(FirstOpc, SecondOpc))
Tim Northover3b0846e2014-05-24 12:50:23 +00001943 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001944
1945 // Can't merge volatiles or load/stores that have a hint to avoid pair
1946 // formation, for example.
1947 if (!isCandidateToMergeOrPair(FirstLdSt) ||
1948 !isCandidateToMergeOrPair(SecondLdSt))
Tim Northover3b0846e2014-05-24 12:50:23 +00001949 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001950
1951 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001952 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001953 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
1954 return false;
1955
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001956 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001957 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
1958 return false;
1959
1960 // Pairwise instructions have a 7-bit signed offset field.
1961 if (Offset1 > 63 || Offset1 < -64)
1962 return false;
1963
Tim Northover3b0846e2014-05-24 12:50:23 +00001964 // The caller should already have ordered First/SecondLdSt by offset.
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001965 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
1966 return Offset1 + 1 == Offset2;
Tim Northover3b0846e2014-05-24 12:50:23 +00001967}
1968
Adrian Prantl87b7eb92014-10-01 18:55:02 +00001969MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
1970 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001971 const MDNode *Expr, const DebugLoc &DL) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001972 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1973 .addFrameIndex(FrameIx)
1974 .addImm(0)
1975 .addImm(Offset)
Adrian Prantl87b7eb92014-10-01 18:55:02 +00001976 .addMetadata(Var)
1977 .addMetadata(Expr);
Tim Northover3b0846e2014-05-24 12:50:23 +00001978 return &*MIB;
1979}
1980
1981static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
1982 unsigned Reg, unsigned SubIdx,
1983 unsigned State,
1984 const TargetRegisterInfo *TRI) {
1985 if (!SubIdx)
1986 return MIB.addReg(Reg, State);
1987
1988 if (TargetRegisterInfo::isPhysicalRegister(Reg))
1989 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1990 return MIB.addReg(Reg, State, SubIdx);
1991}
1992
1993static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1994 unsigned NumRegs) {
1995 // We really want the positive remainder mod 32 here, that happens to be
1996 // easily obtainable with a mask.
1997 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1998}
1999
2000void AArch64InstrInfo::copyPhysRegTuple(
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002001 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00002002 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
Eugene Zelenko049b0172017-01-06 00:30:53 +00002003 ArrayRef<unsigned> Indices) const {
Eric Christopher58f32662014-06-10 22:57:21 +00002004 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002005 "Unexpected register copy without NEON");
Eric Christophera0de2532015-03-18 20:37:30 +00002006 const TargetRegisterInfo *TRI = &getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002007 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2008 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2009 unsigned NumRegs = Indices.size();
2010
2011 int SubReg = 0, End = NumRegs, Incr = 1;
2012 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2013 SubReg = NumRegs - 1;
2014 End = -1;
2015 Incr = -1;
2016 }
2017
2018 for (; SubReg != End; SubReg += Incr) {
James Molloyf8aa57a2015-04-16 11:37:40 +00002019 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
Tim Northover3b0846e2014-05-24 12:50:23 +00002020 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2021 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2022 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2023 }
2024}
2025
2026void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002027 MachineBasicBlock::iterator I,
2028 const DebugLoc &DL, unsigned DestReg,
2029 unsigned SrcReg, bool KillSrc) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002030 if (AArch64::GPR32spRegClass.contains(DestReg) &&
2031 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
Eric Christophera0de2532015-03-18 20:37:30 +00002032 const TargetRegisterInfo *TRI = &getRegisterInfo();
2033
Tim Northover3b0846e2014-05-24 12:50:23 +00002034 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2035 // If either operand is WSP, expand to ADD #0.
2036 if (Subtarget.hasZeroCycleRegMove()) {
2037 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2038 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2039 &AArch64::GPR64spRegClass);
2040 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2041 &AArch64::GPR64spRegClass);
2042 // This instruction is reading and writing X registers. This may upset
2043 // the register scavenger and machine verifier, so we need to indicate
2044 // that we are reading an undefined value from SrcRegX, but a proper
2045 // value from SrcReg.
2046 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2047 .addReg(SrcRegX, RegState::Undef)
2048 .addImm(0)
2049 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2050 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2051 } else {
2052 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2053 .addReg(SrcReg, getKillRegState(KillSrc))
2054 .addImm(0)
2055 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2056 }
2057 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
2058 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
2059 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2060 } else {
2061 if (Subtarget.hasZeroCycleRegMove()) {
2062 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2063 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2064 &AArch64::GPR64spRegClass);
2065 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2066 &AArch64::GPR64spRegClass);
2067 // This instruction is reading and writing X registers. This may upset
2068 // the register scavenger and machine verifier, so we need to indicate
2069 // that we are reading an undefined value from SrcRegX, but a proper
2070 // value from SrcReg.
2071 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2072 .addReg(AArch64::XZR)
2073 .addReg(SrcRegX, RegState::Undef)
2074 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2075 } else {
2076 // Otherwise, expand to ORR WZR.
2077 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2078 .addReg(AArch64::WZR)
2079 .addReg(SrcReg, getKillRegState(KillSrc));
2080 }
2081 }
2082 return;
2083 }
2084
2085 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2086 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2087 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2088 // If either operand is SP, expand to ADD #0.
2089 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2090 .addReg(SrcReg, getKillRegState(KillSrc))
2091 .addImm(0)
2092 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2093 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
2094 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
2095 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2096 } else {
2097 // Otherwise, expand to ORR XZR.
2098 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2099 .addReg(AArch64::XZR)
2100 .addReg(SrcReg, getKillRegState(KillSrc));
2101 }
2102 return;
2103 }
2104
2105 // Copy a DDDD register quad by copying the individual sub-registers.
2106 if (AArch64::DDDDRegClass.contains(DestReg) &&
2107 AArch64::DDDDRegClass.contains(SrcReg)) {
2108 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
2109 AArch64::dsub2, AArch64::dsub3 };
2110 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2111 Indices);
2112 return;
2113 }
2114
2115 // Copy a DDD register triple by copying the individual sub-registers.
2116 if (AArch64::DDDRegClass.contains(DestReg) &&
2117 AArch64::DDDRegClass.contains(SrcReg)) {
2118 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
2119 AArch64::dsub2 };
2120 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2121 Indices);
2122 return;
2123 }
2124
2125 // Copy a DD register pair by copying the individual sub-registers.
2126 if (AArch64::DDRegClass.contains(DestReg) &&
2127 AArch64::DDRegClass.contains(SrcReg)) {
2128 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
2129 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2130 Indices);
2131 return;
2132 }
2133
2134 // Copy a QQQQ register quad by copying the individual sub-registers.
2135 if (AArch64::QQQQRegClass.contains(DestReg) &&
2136 AArch64::QQQQRegClass.contains(SrcReg)) {
2137 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2138 AArch64::qsub2, AArch64::qsub3 };
2139 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2140 Indices);
2141 return;
2142 }
2143
2144 // Copy a QQQ register triple by copying the individual sub-registers.
2145 if (AArch64::QQQRegClass.contains(DestReg) &&
2146 AArch64::QQQRegClass.contains(SrcReg)) {
2147 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2148 AArch64::qsub2 };
2149 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2150 Indices);
2151 return;
2152 }
2153
2154 // Copy a QQ register pair by copying the individual sub-registers.
2155 if (AArch64::QQRegClass.contains(DestReg) &&
2156 AArch64::QQRegClass.contains(SrcReg)) {
2157 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
2158 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2159 Indices);
2160 return;
2161 }
2162
2163 if (AArch64::FPR128RegClass.contains(DestReg) &&
2164 AArch64::FPR128RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002165 if(Subtarget.hasNEON()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002166 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2167 .addReg(SrcReg)
2168 .addReg(SrcReg, getKillRegState(KillSrc));
2169 } else {
2170 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2171 .addReg(AArch64::SP, RegState::Define)
2172 .addReg(SrcReg, getKillRegState(KillSrc))
2173 .addReg(AArch64::SP)
2174 .addImm(-16);
2175 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2176 .addReg(AArch64::SP, RegState::Define)
2177 .addReg(DestReg, RegState::Define)
2178 .addReg(AArch64::SP)
2179 .addImm(16);
2180 }
2181 return;
2182 }
2183
2184 if (AArch64::FPR64RegClass.contains(DestReg) &&
2185 AArch64::FPR64RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002186 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002187 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2188 &AArch64::FPR128RegClass);
2189 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2190 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002191 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2192 .addReg(SrcReg)
2193 .addReg(SrcReg, getKillRegState(KillSrc));
2194 } else {
2195 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2196 .addReg(SrcReg, getKillRegState(KillSrc));
2197 }
2198 return;
2199 }
2200
2201 if (AArch64::FPR32RegClass.contains(DestReg) &&
2202 AArch64::FPR32RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002203 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002204 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2205 &AArch64::FPR128RegClass);
2206 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2207 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002208 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2209 .addReg(SrcReg)
2210 .addReg(SrcReg, getKillRegState(KillSrc));
2211 } else {
2212 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2213 .addReg(SrcReg, getKillRegState(KillSrc));
2214 }
2215 return;
2216 }
2217
2218 if (AArch64::FPR16RegClass.contains(DestReg) &&
2219 AArch64::FPR16RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002220 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002221 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2222 &AArch64::FPR128RegClass);
2223 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2224 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002225 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2226 .addReg(SrcReg)
2227 .addReg(SrcReg, getKillRegState(KillSrc));
2228 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002229 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2230 &AArch64::FPR32RegClass);
2231 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2232 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002233 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2234 .addReg(SrcReg, getKillRegState(KillSrc));
2235 }
2236 return;
2237 }
2238
2239 if (AArch64::FPR8RegClass.contains(DestReg) &&
2240 AArch64::FPR8RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002241 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002242 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
Tim Northover3b0846e2014-05-24 12:50:23 +00002243 &AArch64::FPR128RegClass);
Eric Christophera0de2532015-03-18 20:37:30 +00002244 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2245 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002246 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2247 .addReg(SrcReg)
2248 .addReg(SrcReg, getKillRegState(KillSrc));
2249 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002250 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2251 &AArch64::FPR32RegClass);
2252 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2253 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002254 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2255 .addReg(SrcReg, getKillRegState(KillSrc));
2256 }
2257 return;
2258 }
2259
2260 // Copies between GPR64 and FPR64.
2261 if (AArch64::FPR64RegClass.contains(DestReg) &&
2262 AArch64::GPR64RegClass.contains(SrcReg)) {
2263 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2264 .addReg(SrcReg, getKillRegState(KillSrc));
2265 return;
2266 }
2267 if (AArch64::GPR64RegClass.contains(DestReg) &&
2268 AArch64::FPR64RegClass.contains(SrcReg)) {
2269 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2270 .addReg(SrcReg, getKillRegState(KillSrc));
2271 return;
2272 }
2273 // Copies between GPR32 and FPR32.
2274 if (AArch64::FPR32RegClass.contains(DestReg) &&
2275 AArch64::GPR32RegClass.contains(SrcReg)) {
2276 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2277 .addReg(SrcReg, getKillRegState(KillSrc));
2278 return;
2279 }
2280 if (AArch64::GPR32RegClass.contains(DestReg) &&
2281 AArch64::FPR32RegClass.contains(SrcReg)) {
2282 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2283 .addReg(SrcReg, getKillRegState(KillSrc));
2284 return;
2285 }
2286
Tim Northover1bed9af2014-05-27 12:16:02 +00002287 if (DestReg == AArch64::NZCV) {
2288 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2289 BuildMI(MBB, I, DL, get(AArch64::MSR))
2290 .addImm(AArch64SysReg::NZCV)
2291 .addReg(SrcReg, getKillRegState(KillSrc))
2292 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2293 return;
2294 }
2295
2296 if (SrcReg == AArch64::NZCV) {
2297 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
Quentin Colombet658d9db2016-04-22 18:46:17 +00002298 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
Tim Northover1bed9af2014-05-27 12:16:02 +00002299 .addImm(AArch64SysReg::NZCV)
2300 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2301 return;
2302 }
2303
2304 llvm_unreachable("unimplemented reg-to-reg copy");
Tim Northover3b0846e2014-05-24 12:50:23 +00002305}
2306
2307void AArch64InstrInfo::storeRegToStackSlot(
2308 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2309 bool isKill, int FI, const TargetRegisterClass *RC,
2310 const TargetRegisterInfo *TRI) const {
2311 DebugLoc DL;
2312 if (MBBI != MBB.end())
2313 DL = MBBI->getDebugLoc();
2314 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002315 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002316 unsigned Align = MFI.getObjectAlignment(FI);
2317
Alex Lorenze40c8a22015-08-11 23:09:45 +00002318 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002319 MachineMemOperand *MMO = MF.getMachineMemOperand(
2320 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2321 unsigned Opc = 0;
2322 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002323 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002324 case 1:
2325 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2326 Opc = AArch64::STRBui;
2327 break;
2328 case 2:
2329 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2330 Opc = AArch64::STRHui;
2331 break;
2332 case 4:
2333 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2334 Opc = AArch64::STRWui;
2335 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2336 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2337 else
2338 assert(SrcReg != AArch64::WSP);
2339 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2340 Opc = AArch64::STRSui;
2341 break;
2342 case 8:
2343 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2344 Opc = AArch64::STRXui;
2345 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2346 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2347 else
2348 assert(SrcReg != AArch64::SP);
2349 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2350 Opc = AArch64::STRDui;
2351 break;
2352 case 16:
2353 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2354 Opc = AArch64::STRQui;
2355 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002356 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002357 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002358 Opc = AArch64::ST1Twov1d;
2359 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002360 }
2361 break;
2362 case 24:
2363 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002364 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002365 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002366 Opc = AArch64::ST1Threev1d;
2367 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002368 }
2369 break;
2370 case 32:
2371 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002372 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002373 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002374 Opc = AArch64::ST1Fourv1d;
2375 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002376 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002377 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002378 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002379 Opc = AArch64::ST1Twov2d;
2380 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002381 }
2382 break;
2383 case 48:
2384 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002385 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002386 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002387 Opc = AArch64::ST1Threev2d;
2388 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002389 }
2390 break;
2391 case 64:
2392 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002393 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002394 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002395 Opc = AArch64::ST1Fourv2d;
2396 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002397 }
2398 break;
2399 }
2400 assert(Opc && "Unknown register class");
2401
James Molloyf8aa57a2015-04-16 11:37:40 +00002402 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002403 .addReg(SrcReg, getKillRegState(isKill))
2404 .addFrameIndex(FI);
2405
2406 if (Offset)
2407 MI.addImm(0);
2408 MI.addMemOperand(MMO);
2409}
2410
2411void AArch64InstrInfo::loadRegFromStackSlot(
2412 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2413 int FI, const TargetRegisterClass *RC,
2414 const TargetRegisterInfo *TRI) const {
2415 DebugLoc DL;
2416 if (MBBI != MBB.end())
2417 DL = MBBI->getDebugLoc();
2418 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002419 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002420 unsigned Align = MFI.getObjectAlignment(FI);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002421 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002422 MachineMemOperand *MMO = MF.getMachineMemOperand(
2423 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2424
2425 unsigned Opc = 0;
2426 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002427 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002428 case 1:
2429 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2430 Opc = AArch64::LDRBui;
2431 break;
2432 case 2:
2433 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2434 Opc = AArch64::LDRHui;
2435 break;
2436 case 4:
2437 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2438 Opc = AArch64::LDRWui;
2439 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2440 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2441 else
2442 assert(DestReg != AArch64::WSP);
2443 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2444 Opc = AArch64::LDRSui;
2445 break;
2446 case 8:
2447 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2448 Opc = AArch64::LDRXui;
2449 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2450 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2451 else
2452 assert(DestReg != AArch64::SP);
2453 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2454 Opc = AArch64::LDRDui;
2455 break;
2456 case 16:
2457 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2458 Opc = AArch64::LDRQui;
2459 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002460 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002461 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002462 Opc = AArch64::LD1Twov1d;
2463 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002464 }
2465 break;
2466 case 24:
2467 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002468 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002469 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002470 Opc = AArch64::LD1Threev1d;
2471 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002472 }
2473 break;
2474 case 32:
2475 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002476 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002477 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002478 Opc = AArch64::LD1Fourv1d;
2479 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002480 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002481 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002482 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002483 Opc = AArch64::LD1Twov2d;
2484 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002485 }
2486 break;
2487 case 48:
2488 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002489 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002490 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002491 Opc = AArch64::LD1Threev2d;
2492 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002493 }
2494 break;
2495 case 64:
2496 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002497 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002498 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002499 Opc = AArch64::LD1Fourv2d;
2500 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002501 }
2502 break;
2503 }
2504 assert(Opc && "Unknown register class");
2505
James Molloyf8aa57a2015-04-16 11:37:40 +00002506 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002507 .addReg(DestReg, getDefRegState(true))
2508 .addFrameIndex(FI);
2509 if (Offset)
2510 MI.addImm(0);
2511 MI.addMemOperand(MMO);
2512}
2513
2514void llvm::emitFrameOffset(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002515 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00002516 unsigned DestReg, unsigned SrcReg, int Offset,
Eric Christopherbc76b972014-06-10 17:33:39 +00002517 const TargetInstrInfo *TII,
Tim Northover3b0846e2014-05-24 12:50:23 +00002518 MachineInstr::MIFlag Flag, bool SetNZCV) {
2519 if (DestReg == SrcReg && Offset == 0)
2520 return;
2521
Geoff Berrya5335642016-05-06 16:34:59 +00002522 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2523 "SP increment/decrement not 16-byte aligned");
2524
Tim Northover3b0846e2014-05-24 12:50:23 +00002525 bool isSub = Offset < 0;
2526 if (isSub)
2527 Offset = -Offset;
2528
2529 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2530 // scratch register. If DestReg is a virtual register, use it as the
2531 // scratch register; otherwise, create a new virtual register (to be
2532 // replaced by the scavenger at the end of PEI). That case can be optimized
2533 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2534 // register can be loaded with offset%8 and the add/sub can use an extending
2535 // instruction with LSL#3.
2536 // Currently the function handles any offsets but generates a poor sequence
2537 // of code.
2538 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2539
2540 unsigned Opc;
2541 if (SetNZCV)
2542 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2543 else
2544 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2545 const unsigned MaxEncoding = 0xfff;
2546 const unsigned ShiftSize = 12;
2547 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2548 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2549 unsigned ThisVal;
2550 if (((unsigned)Offset) > MaxEncodableValue) {
2551 ThisVal = MaxEncodableValue;
2552 } else {
2553 ThisVal = Offset & MaxEncodableValue;
2554 }
2555 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2556 "Encoding cannot handle value that big");
2557 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2558 .addReg(SrcReg)
2559 .addImm(ThisVal >> ShiftSize)
2560 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2561 .setMIFlag(Flag);
2562
2563 SrcReg = DestReg;
2564 Offset -= ThisVal;
2565 if (Offset == 0)
2566 return;
2567 }
2568 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2569 .addReg(SrcReg)
2570 .addImm(Offset)
2571 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2572 .setMIFlag(Flag);
2573}
2574
Keno Fischere70b31f2015-06-08 20:09:58 +00002575MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002576 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
Jonas Paulsson8e5b0c62016-05-10 08:09:37 +00002577 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2578 LiveIntervals *LIS) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002579 // This is a bit of a hack. Consider this instruction:
2580 //
2581 // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2582 //
2583 // We explicitly chose GPR64all for the virtual register so such a copy might
2584 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2585 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2586 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2587 //
2588 // To prevent that, we are going to constrain the %vreg0 register class here.
2589 //
2590 // <rdar://problem/11522048>
2591 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002592 if (MI.isFullCopy()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002593 unsigned DstReg = MI.getOperand(0).getReg();
2594 unsigned SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00002595 if (SrcReg == AArch64::SP &&
2596 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2597 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2598 return nullptr;
2599 }
2600 if (DstReg == AArch64::SP &&
2601 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2602 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2603 return nullptr;
2604 }
2605 }
2606
Geoff Berryd46b6e82017-01-05 21:51:42 +00002607 // Handle the case where a copy is being spilled or filled but the source
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002608 // and destination register class don't match. For example:
Geoff Berry7c078fc2016-11-29 18:28:32 +00002609 //
2610 // %vreg0<def> = COPY %XZR; GPR64common:%vreg0
2611 //
2612 // In this case we can still safely fold away the COPY and generate the
2613 // following spill code:
2614 //
2615 // STRXui %XZR, <fi#0>
2616 //
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002617 // This also eliminates spilled cross register class COPYs (e.g. between x and
2618 // d regs) of the same size. For example:
2619 //
2620 // %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
2621 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002622 // will be filled as
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002623 //
2624 // LDRDui %vreg0, fi<#0>
2625 //
2626 // instead of
2627 //
2628 // LDRXui %vregTemp, fi<#0>
2629 // %vreg0 = FMOV %vregTemp
2630 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002631 if (MI.isCopy() && Ops.size() == 1 &&
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002632 // Make sure we're only folding the explicit COPY defs/uses.
2633 (Ops[0] == 0 || Ops[0] == 1)) {
Geoff Berryd46b6e82017-01-05 21:51:42 +00002634 bool IsSpill = Ops[0] == 0;
2635 bool IsFill = !IsSpill;
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002636 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
2637 const MachineRegisterInfo &MRI = MF.getRegInfo();
Geoff Berry7c078fc2016-11-29 18:28:32 +00002638 MachineBasicBlock &MBB = *MI.getParent();
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002639 const MachineOperand &DstMO = MI.getOperand(0);
Geoff Berry7c078fc2016-11-29 18:28:32 +00002640 const MachineOperand &SrcMO = MI.getOperand(1);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002641 unsigned DstReg = DstMO.getReg();
Geoff Berry7c078fc2016-11-29 18:28:32 +00002642 unsigned SrcReg = SrcMO.getReg();
Geoff Berryd46b6e82017-01-05 21:51:42 +00002643 // This is slightly expensive to compute for physical regs since
2644 // getMinimalPhysRegClass is slow.
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002645 auto getRegClass = [&](unsigned Reg) {
2646 return TargetRegisterInfo::isVirtualRegister(Reg)
2647 ? MRI.getRegClass(Reg)
2648 : TRI.getMinimalPhysRegClass(Reg);
2649 };
Geoff Berryd46b6e82017-01-05 21:51:42 +00002650
2651 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002652 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
2653 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00002654 "Mismatched register size in non subreg COPY");
2655 if (IsSpill)
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002656 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
Geoff Berryd46b6e82017-01-05 21:51:42 +00002657 getRegClass(SrcReg), &TRI);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002658 else
Geoff Berryd46b6e82017-01-05 21:51:42 +00002659 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
2660 getRegClass(DstReg), &TRI);
Geoff Berry7c078fc2016-11-29 18:28:32 +00002661 return &*--InsertPt;
2662 }
Geoff Berryd46b6e82017-01-05 21:51:42 +00002663
2664 // Handle cases like spilling def of:
2665 //
2666 // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
2667 //
2668 // where the physical register source can be widened and stored to the full
2669 // virtual reg destination stack slot, in this case producing:
2670 //
2671 // STRXui %XZR, <fi#0>
2672 //
2673 if (IsSpill && DstMO.isUndef() &&
2674 TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2675 assert(SrcMO.getSubReg() == 0 &&
2676 "Unexpected subreg on physical register");
2677 const TargetRegisterClass *SpillRC;
2678 unsigned SpillSubreg;
2679 switch (DstMO.getSubReg()) {
2680 default:
2681 SpillRC = nullptr;
2682 break;
2683 case AArch64::sub_32:
2684 case AArch64::ssub:
2685 if (AArch64::GPR32RegClass.contains(SrcReg)) {
2686 SpillRC = &AArch64::GPR64RegClass;
2687 SpillSubreg = AArch64::sub_32;
2688 } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
2689 SpillRC = &AArch64::FPR64RegClass;
2690 SpillSubreg = AArch64::ssub;
2691 } else
2692 SpillRC = nullptr;
2693 break;
2694 case AArch64::dsub:
2695 if (AArch64::FPR64RegClass.contains(SrcReg)) {
2696 SpillRC = &AArch64::FPR128RegClass;
2697 SpillSubreg = AArch64::dsub;
2698 } else
2699 SpillRC = nullptr;
2700 break;
2701 }
2702
2703 if (SpillRC)
2704 if (unsigned WidenedSrcReg =
2705 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
2706 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
2707 FrameIndex, SpillRC, &TRI);
2708 return &*--InsertPt;
2709 }
2710 }
2711
2712 // Handle cases like filling use of:
2713 //
2714 // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
2715 //
2716 // where we can load the full virtual reg source stack slot, into the subreg
2717 // destination, in this case producing:
2718 //
2719 // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
2720 //
2721 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
2722 const TargetRegisterClass *FillRC;
2723 switch (DstMO.getSubReg()) {
2724 default:
2725 FillRC = nullptr;
2726 break;
2727 case AArch64::sub_32:
2728 FillRC = &AArch64::GPR32RegClass;
2729 break;
2730 case AArch64::ssub:
2731 FillRC = &AArch64::FPR32RegClass;
2732 break;
2733 case AArch64::dsub:
2734 FillRC = &AArch64::FPR64RegClass;
2735 break;
2736 }
2737
2738 if (FillRC) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002739 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
2740 TRI.getRegSizeInBits(*FillRC) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00002741 "Mismatched regclass size on folded subreg COPY");
2742 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
2743 MachineInstr &LoadMI = *--InsertPt;
2744 MachineOperand &LoadDst = LoadMI.getOperand(0);
2745 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
2746 LoadDst.setSubReg(DstMO.getSubReg());
2747 LoadDst.setIsUndef();
2748 return &LoadMI;
2749 }
2750 }
Geoff Berry7c078fc2016-11-29 18:28:32 +00002751 }
2752
Tim Northover3b0846e2014-05-24 12:50:23 +00002753 // Cannot fold.
2754 return nullptr;
2755}
2756
2757int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2758 bool *OutUseUnscaledOp,
2759 unsigned *OutUnscaledOp,
2760 int *EmittableOffset) {
2761 int Scale = 1;
2762 bool IsSigned = false;
2763 // The ImmIdx should be changed case by case if it is not 2.
2764 unsigned ImmIdx = 2;
2765 unsigned UnscaledOp = 0;
2766 // Set output values in case of early exit.
2767 if (EmittableOffset)
2768 *EmittableOffset = 0;
2769 if (OutUseUnscaledOp)
2770 *OutUseUnscaledOp = false;
2771 if (OutUnscaledOp)
2772 *OutUnscaledOp = 0;
2773 switch (MI.getOpcode()) {
2774 default:
Craig Topper2a30d782014-06-18 05:05:13 +00002775 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
Tim Northover3b0846e2014-05-24 12:50:23 +00002776 // Vector spills/fills can't take an immediate offset.
2777 case AArch64::LD1Twov2d:
2778 case AArch64::LD1Threev2d:
2779 case AArch64::LD1Fourv2d:
2780 case AArch64::LD1Twov1d:
2781 case AArch64::LD1Threev1d:
2782 case AArch64::LD1Fourv1d:
2783 case AArch64::ST1Twov2d:
2784 case AArch64::ST1Threev2d:
2785 case AArch64::ST1Fourv2d:
2786 case AArch64::ST1Twov1d:
2787 case AArch64::ST1Threev1d:
2788 case AArch64::ST1Fourv1d:
2789 return AArch64FrameOffsetCannotUpdate;
2790 case AArch64::PRFMui:
2791 Scale = 8;
2792 UnscaledOp = AArch64::PRFUMi;
2793 break;
2794 case AArch64::LDRXui:
2795 Scale = 8;
2796 UnscaledOp = AArch64::LDURXi;
2797 break;
2798 case AArch64::LDRWui:
2799 Scale = 4;
2800 UnscaledOp = AArch64::LDURWi;
2801 break;
2802 case AArch64::LDRBui:
2803 Scale = 1;
2804 UnscaledOp = AArch64::LDURBi;
2805 break;
2806 case AArch64::LDRHui:
2807 Scale = 2;
2808 UnscaledOp = AArch64::LDURHi;
2809 break;
2810 case AArch64::LDRSui:
2811 Scale = 4;
2812 UnscaledOp = AArch64::LDURSi;
2813 break;
2814 case AArch64::LDRDui:
2815 Scale = 8;
2816 UnscaledOp = AArch64::LDURDi;
2817 break;
2818 case AArch64::LDRQui:
2819 Scale = 16;
2820 UnscaledOp = AArch64::LDURQi;
2821 break;
2822 case AArch64::LDRBBui:
2823 Scale = 1;
2824 UnscaledOp = AArch64::LDURBBi;
2825 break;
2826 case AArch64::LDRHHui:
2827 Scale = 2;
2828 UnscaledOp = AArch64::LDURHHi;
2829 break;
2830 case AArch64::LDRSBXui:
2831 Scale = 1;
2832 UnscaledOp = AArch64::LDURSBXi;
2833 break;
2834 case AArch64::LDRSBWui:
2835 Scale = 1;
2836 UnscaledOp = AArch64::LDURSBWi;
2837 break;
2838 case AArch64::LDRSHXui:
2839 Scale = 2;
2840 UnscaledOp = AArch64::LDURSHXi;
2841 break;
2842 case AArch64::LDRSHWui:
2843 Scale = 2;
2844 UnscaledOp = AArch64::LDURSHWi;
2845 break;
2846 case AArch64::LDRSWui:
2847 Scale = 4;
2848 UnscaledOp = AArch64::LDURSWi;
2849 break;
2850
2851 case AArch64::STRXui:
2852 Scale = 8;
2853 UnscaledOp = AArch64::STURXi;
2854 break;
2855 case AArch64::STRWui:
2856 Scale = 4;
2857 UnscaledOp = AArch64::STURWi;
2858 break;
2859 case AArch64::STRBui:
2860 Scale = 1;
2861 UnscaledOp = AArch64::STURBi;
2862 break;
2863 case AArch64::STRHui:
2864 Scale = 2;
2865 UnscaledOp = AArch64::STURHi;
2866 break;
2867 case AArch64::STRSui:
2868 Scale = 4;
2869 UnscaledOp = AArch64::STURSi;
2870 break;
2871 case AArch64::STRDui:
2872 Scale = 8;
2873 UnscaledOp = AArch64::STURDi;
2874 break;
2875 case AArch64::STRQui:
2876 Scale = 16;
2877 UnscaledOp = AArch64::STURQi;
2878 break;
2879 case AArch64::STRBBui:
2880 Scale = 1;
2881 UnscaledOp = AArch64::STURBBi;
2882 break;
2883 case AArch64::STRHHui:
2884 Scale = 2;
2885 UnscaledOp = AArch64::STURHHi;
2886 break;
2887
2888 case AArch64::LDPXi:
2889 case AArch64::LDPDi:
2890 case AArch64::STPXi:
2891 case AArch64::STPDi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002892 case AArch64::LDNPXi:
2893 case AArch64::LDNPDi:
2894 case AArch64::STNPXi:
2895 case AArch64::STNPDi:
2896 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002897 IsSigned = true;
2898 Scale = 8;
2899 break;
2900 case AArch64::LDPQi:
2901 case AArch64::STPQi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002902 case AArch64::LDNPQi:
2903 case AArch64::STNPQi:
2904 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002905 IsSigned = true;
2906 Scale = 16;
2907 break;
2908 case AArch64::LDPWi:
2909 case AArch64::LDPSi:
2910 case AArch64::STPWi:
2911 case AArch64::STPSi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002912 case AArch64::LDNPWi:
2913 case AArch64::LDNPSi:
2914 case AArch64::STNPWi:
2915 case AArch64::STNPSi:
2916 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002917 IsSigned = true;
2918 Scale = 4;
2919 break;
2920
2921 case AArch64::LDURXi:
2922 case AArch64::LDURWi:
2923 case AArch64::LDURBi:
2924 case AArch64::LDURHi:
2925 case AArch64::LDURSi:
2926 case AArch64::LDURDi:
2927 case AArch64::LDURQi:
2928 case AArch64::LDURHHi:
2929 case AArch64::LDURBBi:
2930 case AArch64::LDURSBXi:
2931 case AArch64::LDURSBWi:
2932 case AArch64::LDURSHXi:
2933 case AArch64::LDURSHWi:
2934 case AArch64::LDURSWi:
2935 case AArch64::STURXi:
2936 case AArch64::STURWi:
2937 case AArch64::STURBi:
2938 case AArch64::STURHi:
2939 case AArch64::STURSi:
2940 case AArch64::STURDi:
2941 case AArch64::STURQi:
2942 case AArch64::STURBBi:
2943 case AArch64::STURHHi:
2944 Scale = 1;
2945 break;
2946 }
2947
2948 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2949
2950 bool useUnscaledOp = false;
2951 // If the offset doesn't match the scale, we rewrite the instruction to
2952 // use the unscaled instruction instead. Likewise, if we have a negative
2953 // offset (and have an unscaled op to use).
2954 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2955 useUnscaledOp = true;
2956
2957 // Use an unscaled addressing mode if the instruction has a negative offset
2958 // (or if the instruction is already using an unscaled addressing mode).
2959 unsigned MaskBits;
2960 if (IsSigned) {
2961 // ldp/stp instructions.
2962 MaskBits = 7;
2963 Offset /= Scale;
2964 } else if (UnscaledOp == 0 || useUnscaledOp) {
2965 MaskBits = 9;
2966 IsSigned = true;
2967 Scale = 1;
2968 } else {
2969 MaskBits = 12;
2970 IsSigned = false;
2971 Offset /= Scale;
2972 }
2973
2974 // Attempt to fold address computation.
2975 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2976 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2977 if (Offset >= MinOff && Offset <= MaxOff) {
2978 if (EmittableOffset)
2979 *EmittableOffset = Offset;
2980 Offset = 0;
2981 } else {
2982 int NewOff = Offset < 0 ? MinOff : MaxOff;
2983 if (EmittableOffset)
2984 *EmittableOffset = NewOff;
2985 Offset = (Offset - NewOff) * Scale;
2986 }
2987 if (OutUseUnscaledOp)
2988 *OutUseUnscaledOp = useUnscaledOp;
2989 if (OutUnscaledOp)
2990 *OutUnscaledOp = UnscaledOp;
2991 return AArch64FrameOffsetCanUpdate |
2992 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2993}
2994
2995bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2996 unsigned FrameReg, int &Offset,
2997 const AArch64InstrInfo *TII) {
2998 unsigned Opcode = MI.getOpcode();
2999 unsigned ImmIdx = FrameRegIdx + 1;
3000
3001 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3002 Offset += MI.getOperand(ImmIdx).getImm();
3003 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3004 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3005 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3006 MI.eraseFromParent();
3007 Offset = 0;
3008 return true;
3009 }
3010
3011 int NewOffset;
3012 unsigned UnscaledOp;
3013 bool UseUnscaledOp;
3014 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3015 &UnscaledOp, &NewOffset);
3016 if (Status & AArch64FrameOffsetCanUpdate) {
3017 if (Status & AArch64FrameOffsetIsLegal)
3018 // Replace the FrameIndex with FrameReg.
3019 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3020 if (UseUnscaledOp)
3021 MI.setDesc(TII->get(UnscaledOp));
3022
3023 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3024 return Offset == 0;
3025 }
3026
3027 return false;
3028}
3029
Hans Wennborg9b9a5352017-04-21 21:48:41 +00003030void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00003031 NopInst.setOpcode(AArch64::HINT);
Jim Grosbache9119e42015-05-13 18:37:00 +00003032 NopInst.addOperand(MCOperand::createImm(0));
Tim Northover3b0846e2014-05-24 12:50:23 +00003033}
Chad Rosier9d1a5562016-05-02 14:56:21 +00003034
3035// AArch64 supports MachineCombiner.
Benjamin Kramer8c90fd72014-09-03 11:41:21 +00003036bool AArch64InstrInfo::useMachineCombiner() const {
Chad Rosier9d1a5562016-05-02 14:56:21 +00003037
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003038 return true;
3039}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003040
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003041// True when Opc sets flag
3042static bool isCombineInstrSettingFlag(unsigned Opc) {
3043 switch (Opc) {
3044 case AArch64::ADDSWrr:
3045 case AArch64::ADDSWri:
3046 case AArch64::ADDSXrr:
3047 case AArch64::ADDSXri:
3048 case AArch64::SUBSWrr:
3049 case AArch64::SUBSXrr:
3050 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3051 case AArch64::SUBSWri:
3052 case AArch64::SUBSXri:
3053 return true;
3054 default:
3055 break;
3056 }
3057 return false;
3058}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003059
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003060// 32b Opcodes that can be combined with a MUL
3061static bool isCombineInstrCandidate32(unsigned Opc) {
3062 switch (Opc) {
3063 case AArch64::ADDWrr:
3064 case AArch64::ADDWri:
3065 case AArch64::SUBWrr:
3066 case AArch64::ADDSWrr:
3067 case AArch64::ADDSWri:
3068 case AArch64::SUBSWrr:
3069 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3070 case AArch64::SUBWri:
3071 case AArch64::SUBSWri:
3072 return true;
3073 default:
3074 break;
3075 }
3076 return false;
3077}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003078
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003079// 64b Opcodes that can be combined with a MUL
3080static bool isCombineInstrCandidate64(unsigned Opc) {
3081 switch (Opc) {
3082 case AArch64::ADDXrr:
3083 case AArch64::ADDXri:
3084 case AArch64::SUBXrr:
3085 case AArch64::ADDSXrr:
3086 case AArch64::ADDSXri:
3087 case AArch64::SUBSXrr:
3088 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3089 case AArch64::SUBXri:
3090 case AArch64::SUBSXri:
3091 return true;
3092 default:
3093 break;
3094 }
3095 return false;
3096}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003097
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003098// FP Opcodes that can be combined with a FMUL
3099static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3100 switch (Inst.getOpcode()) {
Evandro Menezes19b2aed2016-09-15 19:55:23 +00003101 default:
3102 break;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003103 case AArch64::FADDSrr:
3104 case AArch64::FADDDrr:
3105 case AArch64::FADDv2f32:
3106 case AArch64::FADDv2f64:
3107 case AArch64::FADDv4f32:
3108 case AArch64::FSUBSrr:
3109 case AArch64::FSUBDrr:
3110 case AArch64::FSUBv2f32:
3111 case AArch64::FSUBv2f64:
3112 case AArch64::FSUBv4f32:
Logan Chience542ee2017-01-05 23:41:33 +00003113 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3114 return (Options.UnsafeFPMath ||
3115 Options.AllowFPOpFusion == FPOpFusion::Fast);
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003116 }
3117 return false;
3118}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003119
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003120// Opcodes that can be combined with a MUL
3121static bool isCombineInstrCandidate(unsigned Opc) {
3122 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3123}
3124
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003125//
3126// Utility routine that checks if \param MO is defined by an
3127// \param CombineOpc instruction in the basic block \param MBB
3128static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3129 unsigned CombineOpc, unsigned ZeroReg = 0,
3130 bool CheckZeroReg = false) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003131 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3132 MachineInstr *MI = nullptr;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003133
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003134 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3135 MI = MRI.getUniqueVRegDef(MO.getReg());
3136 // And it needs to be in the trace (otherwise, it won't have a depth).
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003137 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003138 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003139 // Must only used by the user we combine with.
Gerolf Hoflehnerfe2c11f2014-08-13 22:07:36 +00003140 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003141 return false;
3142
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003143 if (CheckZeroReg) {
3144 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3145 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3146 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3147 // The third input reg must be zero.
3148 if (MI->getOperand(3).getReg() != ZeroReg)
3149 return false;
3150 }
3151
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003152 return true;
3153}
3154
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003155//
3156// Is \param MO defined by an integer multiply and can be combined?
3157static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3158 unsigned MulOpc, unsigned ZeroReg) {
3159 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3160}
3161
3162//
3163// Is \param MO defined by a floating-point multiply and can be combined?
3164static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3165 unsigned MulOpc) {
3166 return canCombine(MBB, MO, MulOpc);
3167}
3168
Haicheng Wu08b94622016-01-07 04:01:02 +00003169// TODO: There are many more machine instruction opcodes to match:
3170// 1. Other data types (integer, vectors)
3171// 2. Other math / logic operations (xor, or)
3172// 3. Other forms of the same operation (intrinsics and other variants)
3173bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
3174 switch (Inst.getOpcode()) {
3175 case AArch64::FADDDrr:
3176 case AArch64::FADDSrr:
3177 case AArch64::FADDv2f32:
3178 case AArch64::FADDv2f64:
3179 case AArch64::FADDv4f32:
3180 case AArch64::FMULDrr:
3181 case AArch64::FMULSrr:
3182 case AArch64::FMULX32:
3183 case AArch64::FMULX64:
3184 case AArch64::FMULXv2f32:
3185 case AArch64::FMULXv2f64:
3186 case AArch64::FMULXv4f32:
3187 case AArch64::FMULv2f32:
3188 case AArch64::FMULv2f64:
3189 case AArch64::FMULv4f32:
3190 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3191 default:
3192 return false;
3193 }
3194}
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003195
Haicheng Wu08b94622016-01-07 04:01:02 +00003196/// Find instructions that can be turned into madd.
3197static bool getMaddPatterns(MachineInstr &Root,
3198 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003199 unsigned Opc = Root.getOpcode();
3200 MachineBasicBlock &MBB = *Root.getParent();
3201 bool Found = false;
3202
3203 if (!isCombineInstrCandidate(Opc))
Chad Rosier85c85942016-03-23 20:07:28 +00003204 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003205 if (isCombineInstrSettingFlag(Opc)) {
3206 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3207 // When NZCV is live bail out.
3208 if (Cmp_NZCV == -1)
Chad Rosier85c85942016-03-23 20:07:28 +00003209 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003210 unsigned NewOpc = convertFlagSettingOpcode(Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003211 // When opcode can't change bail out.
3212 // CHECKME: do we miss any cases for opcode conversion?
3213 if (NewOpc == Opc)
Chad Rosier85c85942016-03-23 20:07:28 +00003214 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003215 Opc = NewOpc;
3216 }
3217
3218 switch (Opc) {
3219 default:
3220 break;
3221 case AArch64::ADDWrr:
3222 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3223 "ADDWrr does not have register operands");
3224 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3225 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003226 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003227 Found = true;
3228 }
3229 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3230 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003231 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003232 Found = true;
3233 }
3234 break;
3235 case AArch64::ADDXrr:
3236 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3237 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003238 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003239 Found = true;
3240 }
3241 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3242 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003243 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003244 Found = true;
3245 }
3246 break;
3247 case AArch64::SUBWrr:
3248 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3249 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003250 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003251 Found = true;
3252 }
3253 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3254 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003255 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003256 Found = true;
3257 }
3258 break;
3259 case AArch64::SUBXrr:
3260 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3261 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003262 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003263 Found = true;
3264 }
3265 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3266 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003267 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003268 Found = true;
3269 }
3270 break;
3271 case AArch64::ADDWri:
3272 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3273 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003274 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003275 Found = true;
3276 }
3277 break;
3278 case AArch64::ADDXri:
3279 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3280 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003281 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003282 Found = true;
3283 }
3284 break;
3285 case AArch64::SUBWri:
3286 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3287 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003288 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003289 Found = true;
3290 }
3291 break;
3292 case AArch64::SUBXri:
3293 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3294 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003295 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003296 Found = true;
3297 }
3298 break;
3299 }
3300 return Found;
3301}
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003302/// Floating-Point Support
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003303
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003304/// Find instructions that can be turned into madd.
3305static bool getFMAPatterns(MachineInstr &Root,
3306 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3307
3308 if (!isCombineInstrCandidateFP(Root))
Eugene Zelenko049b0172017-01-06 00:30:53 +00003309 return false;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003310
3311 MachineBasicBlock &MBB = *Root.getParent();
3312 bool Found = false;
3313
3314 switch (Root.getOpcode()) {
3315 default:
3316 assert(false && "Unsupported FP instruction in combiner\n");
3317 break;
3318 case AArch64::FADDSrr:
3319 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3320 "FADDWrr does not have register operands");
3321 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3322 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3323 Found = true;
3324 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3325 AArch64::FMULv1i32_indexed)) {
3326 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3327 Found = true;
3328 }
3329 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3330 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3331 Found = true;
3332 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3333 AArch64::FMULv1i32_indexed)) {
3334 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3335 Found = true;
3336 }
3337 break;
3338 case AArch64::FADDDrr:
3339 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3340 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3341 Found = true;
3342 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3343 AArch64::FMULv1i64_indexed)) {
3344 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3345 Found = true;
3346 }
3347 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3348 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3349 Found = true;
3350 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3351 AArch64::FMULv1i64_indexed)) {
3352 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3353 Found = true;
3354 }
3355 break;
3356 case AArch64::FADDv2f32:
3357 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3358 AArch64::FMULv2i32_indexed)) {
3359 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3360 Found = true;
3361 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3362 AArch64::FMULv2f32)) {
3363 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3364 Found = true;
3365 }
3366 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3367 AArch64::FMULv2i32_indexed)) {
3368 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3369 Found = true;
3370 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3371 AArch64::FMULv2f32)) {
3372 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3373 Found = true;
3374 }
3375 break;
3376 case AArch64::FADDv2f64:
3377 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3378 AArch64::FMULv2i64_indexed)) {
3379 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3380 Found = true;
3381 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3382 AArch64::FMULv2f64)) {
3383 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3384 Found = true;
3385 }
3386 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3387 AArch64::FMULv2i64_indexed)) {
3388 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3389 Found = true;
3390 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3391 AArch64::FMULv2f64)) {
3392 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3393 Found = true;
3394 }
3395 break;
3396 case AArch64::FADDv4f32:
3397 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3398 AArch64::FMULv4i32_indexed)) {
3399 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3400 Found = true;
3401 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3402 AArch64::FMULv4f32)) {
3403 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3404 Found = true;
3405 }
3406 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3407 AArch64::FMULv4i32_indexed)) {
3408 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3409 Found = true;
3410 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3411 AArch64::FMULv4f32)) {
3412 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3413 Found = true;
3414 }
3415 break;
3416
3417 case AArch64::FSUBSrr:
3418 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3419 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3420 Found = true;
3421 }
3422 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3423 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3424 Found = true;
3425 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3426 AArch64::FMULv1i32_indexed)) {
3427 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3428 Found = true;
3429 }
3430 break;
3431 case AArch64::FSUBDrr:
3432 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3433 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3434 Found = true;
3435 }
3436 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3437 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3438 Found = true;
3439 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3440 AArch64::FMULv1i64_indexed)) {
3441 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3442 Found = true;
3443 }
3444 break;
3445 case AArch64::FSUBv2f32:
3446 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3447 AArch64::FMULv2i32_indexed)) {
3448 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3449 Found = true;
3450 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3451 AArch64::FMULv2f32)) {
3452 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3453 Found = true;
3454 }
3455 break;
3456 case AArch64::FSUBv2f64:
3457 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3458 AArch64::FMULv2i64_indexed)) {
3459 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3460 Found = true;
3461 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3462 AArch64::FMULv2f64)) {
3463 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3464 Found = true;
3465 }
3466 break;
3467 case AArch64::FSUBv4f32:
3468 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3469 AArch64::FMULv4i32_indexed)) {
3470 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3471 Found = true;
3472 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3473 AArch64::FMULv4f32)) {
3474 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3475 Found = true;
3476 }
3477 break;
3478 }
3479 return Found;
3480}
3481
3482/// Return true when a code sequence can improve throughput. It
3483/// should be called only for instructions in loops.
3484/// \param Pattern - combiner pattern
3485bool
3486AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
3487 switch (Pattern) {
3488 default:
3489 break;
3490 case MachineCombinerPattern::FMULADDS_OP1:
3491 case MachineCombinerPattern::FMULADDS_OP2:
3492 case MachineCombinerPattern::FMULSUBS_OP1:
3493 case MachineCombinerPattern::FMULSUBS_OP2:
3494 case MachineCombinerPattern::FMULADDD_OP1:
3495 case MachineCombinerPattern::FMULADDD_OP2:
3496 case MachineCombinerPattern::FMULSUBD_OP1:
3497 case MachineCombinerPattern::FMULSUBD_OP2:
3498 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3499 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3500 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3501 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3502 case MachineCombinerPattern::FMLAv2f32_OP2:
3503 case MachineCombinerPattern::FMLAv2f32_OP1:
3504 case MachineCombinerPattern::FMLAv2f64_OP1:
3505 case MachineCombinerPattern::FMLAv2f64_OP2:
3506 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3507 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3508 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3509 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3510 case MachineCombinerPattern::FMLAv4f32_OP1:
3511 case MachineCombinerPattern::FMLAv4f32_OP2:
3512 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3513 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3514 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3515 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3516 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3517 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3518 case MachineCombinerPattern::FMLSv2f32_OP2:
3519 case MachineCombinerPattern::FMLSv2f64_OP2:
3520 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3521 case MachineCombinerPattern::FMLSv4f32_OP2:
3522 return true;
3523 } // end switch (Pattern)
3524 return false;
3525}
Haicheng Wu08b94622016-01-07 04:01:02 +00003526/// Return true when there is potentially a faster code sequence for an
3527/// instruction chain ending in \p Root. All potential patterns are listed in
3528/// the \p Pattern vector. Pattern should be sorted in priority order since the
3529/// pattern evaluator stops checking as soon as it finds a faster sequence.
3530
3531bool AArch64InstrInfo::getMachineCombinerPatterns(
3532 MachineInstr &Root,
3533 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003534 // Integer patterns
Haicheng Wu08b94622016-01-07 04:01:02 +00003535 if (getMaddPatterns(Root, Patterns))
3536 return true;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003537 // Floating point patterns
3538 if (getFMAPatterns(Root, Patterns))
3539 return true;
Haicheng Wu08b94622016-01-07 04:01:02 +00003540
3541 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3542}
3543
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003544enum class FMAInstKind { Default, Indexed, Accumulator };
3545/// genFusedMultiply - Generate fused multiply instructions.
3546/// This function supports both integer and floating point instructions.
3547/// A typical example:
3548/// F|MUL I=A,B,0
3549/// F|ADD R,I,C
3550/// ==> F|MADD R,A,B,C
3551/// \param Root is the F|ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003552/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003553/// contain the generated madd instruction
3554/// \param IdxMulOpd is index of operand in Root that is the result of
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003555/// the F|MUL. In the example above IdxMulOpd is 1.
3556/// \param MaddOpc the opcode fo the f|madd instruction
3557static MachineInstr *
3558genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3559 const TargetInstrInfo *TII, MachineInstr &Root,
3560 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3561 unsigned MaddOpc, const TargetRegisterClass *RC,
3562 FMAInstKind kind = FMAInstKind::Default) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003563 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3564
3565 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3566 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003567 unsigned ResultReg = Root.getOperand(0).getReg();
3568 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3569 bool Src0IsKill = MUL->getOperand(1).isKill();
3570 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3571 bool Src1IsKill = MUL->getOperand(2).isKill();
3572 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3573 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3574
3575 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3576 MRI.constrainRegClass(ResultReg, RC);
3577 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3578 MRI.constrainRegClass(SrcReg0, RC);
3579 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3580 MRI.constrainRegClass(SrcReg1, RC);
3581 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3582 MRI.constrainRegClass(SrcReg2, RC);
3583
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003584 MachineInstrBuilder MIB;
3585 if (kind == FMAInstKind::Default)
3586 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3587 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3588 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3589 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3590 else if (kind == FMAInstKind::Indexed)
3591 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3592 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3593 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3594 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3595 .addImm(MUL->getOperand(3).getImm());
3596 else if (kind == FMAInstKind::Accumulator)
3597 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3598 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3599 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3600 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3601 else
3602 assert(false && "Invalid FMA instruction kind \n");
3603 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003604 InsInstrs.push_back(MIB);
3605 return MUL;
3606}
3607
3608/// genMaddR - Generate madd instruction and combine mul and add using
3609/// an extra virtual register
3610/// Example - an ADD intermediate needs to be stored in a register:
3611/// MUL I=A,B,0
3612/// ADD R,I,Imm
3613/// ==> ORR V, ZR, Imm
3614/// ==> MADD R,A,B,V
3615/// \param Root is the ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003616/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003617/// contain the generated madd instruction
3618/// \param IdxMulOpd is index of operand in Root that is the result of
3619/// the MUL. In the example above IdxMulOpd is 1.
3620/// \param MaddOpc the opcode fo the madd instruction
3621/// \param VR is a virtual register that holds the value of an ADD operand
3622/// (V in the example above).
3623static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3624 const TargetInstrInfo *TII, MachineInstr &Root,
3625 SmallVectorImpl<MachineInstr *> &InsInstrs,
3626 unsigned IdxMulOpd, unsigned MaddOpc,
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003627 unsigned VR, const TargetRegisterClass *RC) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003628 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3629
3630 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003631 unsigned ResultReg = Root.getOperand(0).getReg();
3632 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3633 bool Src0IsKill = MUL->getOperand(1).isKill();
3634 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3635 bool Src1IsKill = MUL->getOperand(2).isKill();
3636
3637 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3638 MRI.constrainRegClass(ResultReg, RC);
3639 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3640 MRI.constrainRegClass(SrcReg0, RC);
3641 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3642 MRI.constrainRegClass(SrcReg1, RC);
3643 if (TargetRegisterInfo::isVirtualRegister(VR))
3644 MRI.constrainRegClass(VR, RC);
3645
3646 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
3647 ResultReg)
3648 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3649 .addReg(SrcReg1, getKillRegState(Src1IsKill))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003650 .addReg(VR);
3651 // Insert the MADD
3652 InsInstrs.push_back(MIB);
3653 return MUL;
3654}
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003655
Sanjay Patelcfe03932015-06-19 23:21:42 +00003656/// When getMachineCombinerPatterns() finds potential patterns,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003657/// this function generates the instructions that could replace the
3658/// original code sequence
3659void AArch64InstrInfo::genAlternativeCodeSequence(
Sanjay Patel387e66e2015-11-05 19:34:57 +00003660 MachineInstr &Root, MachineCombinerPattern Pattern,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003661 SmallVectorImpl<MachineInstr *> &InsInstrs,
3662 SmallVectorImpl<MachineInstr *> &DelInstrs,
3663 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3664 MachineBasicBlock &MBB = *Root.getParent();
3665 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3666 MachineFunction &MF = *MBB.getParent();
Eric Christophere0818912014-09-03 20:36:26 +00003667 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003668
3669 MachineInstr *MUL;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003670 const TargetRegisterClass *RC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003671 unsigned Opc;
3672 switch (Pattern) {
3673 default:
Haicheng Wu08b94622016-01-07 04:01:02 +00003674 // Reassociate instructions.
3675 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3676 DelInstrs, InstrIdxForVirtReg);
3677 return;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003678 case MachineCombinerPattern::MULADDW_OP1:
3679 case MachineCombinerPattern::MULADDX_OP1:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003680 // MUL I=A,B,0
3681 // ADD R,I,C
3682 // ==> MADD R,A,B,C
3683 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003684 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003685 Opc = AArch64::MADDWrrr;
3686 RC = &AArch64::GPR32RegClass;
3687 } else {
3688 Opc = AArch64::MADDXrrr;
3689 RC = &AArch64::GPR64RegClass;
3690 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003691 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003692 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003693 case MachineCombinerPattern::MULADDW_OP2:
3694 case MachineCombinerPattern::MULADDX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003695 // MUL I=A,B,0
3696 // ADD R,C,I
3697 // ==> MADD R,A,B,C
3698 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003699 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003700 Opc = AArch64::MADDWrrr;
3701 RC = &AArch64::GPR32RegClass;
3702 } else {
3703 Opc = AArch64::MADDXrrr;
3704 RC = &AArch64::GPR64RegClass;
3705 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003706 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003707 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003708 case MachineCombinerPattern::MULADDWI_OP1:
3709 case MachineCombinerPattern::MULADDXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003710 // MUL I=A,B,0
3711 // ADD R,I,Imm
3712 // ==> ORR V, ZR, Imm
3713 // ==> MADD R,A,B,V
3714 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003715 const TargetRegisterClass *OrrRC;
3716 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003717 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003718 OrrOpc = AArch64::ORRWri;
3719 OrrRC = &AArch64::GPR32spRegClass;
3720 BitSize = 32;
3721 ZeroReg = AArch64::WZR;
3722 Opc = AArch64::MADDWrrr;
3723 RC = &AArch64::GPR32RegClass;
3724 } else {
3725 OrrOpc = AArch64::ORRXri;
3726 OrrRC = &AArch64::GPR64spRegClass;
3727 BitSize = 64;
3728 ZeroReg = AArch64::XZR;
3729 Opc = AArch64::MADDXrrr;
3730 RC = &AArch64::GPR64RegClass;
3731 }
3732 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3733 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003734
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003735 if (Root.getOperand(3).isImm()) {
3736 unsigned Val = Root.getOperand(3).getImm();
3737 Imm = Imm << Val;
3738 }
David Majnemer1182dd82016-07-21 23:46:56 +00003739 uint64_t UImm = SignExtend64(Imm, BitSize);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003740 uint64_t Encoding;
3741 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3742 MachineInstrBuilder MIB1 =
3743 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3744 .addReg(ZeroReg)
3745 .addImm(Encoding);
3746 InsInstrs.push_back(MIB1);
3747 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3748 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003749 }
3750 break;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003751 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003752 case MachineCombinerPattern::MULSUBW_OP1:
3753 case MachineCombinerPattern::MULSUBX_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003754 // MUL I=A,B,0
3755 // SUB R,I, C
3756 // ==> SUB V, 0, C
3757 // ==> MADD R,A,B,V // = -C + A*B
3758 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003759 const TargetRegisterClass *SubRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003760 unsigned SubOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003761 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003762 SubOpc = AArch64::SUBWrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003763 SubRC = &AArch64::GPR32spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003764 ZeroReg = AArch64::WZR;
3765 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003766 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003767 } else {
3768 SubOpc = AArch64::SUBXrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003769 SubRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003770 ZeroReg = AArch64::XZR;
3771 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003772 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003773 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003774 unsigned NewVR = MRI.createVirtualRegister(SubRC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003775 // SUB NewVR, 0, C
3776 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003777 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003778 .addReg(ZeroReg)
Diana Picus116bbab2017-01-13 09:58:52 +00003779 .add(Root.getOperand(2));
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003780 InsInstrs.push_back(MIB1);
3781 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003782 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3783 break;
3784 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003785 case MachineCombinerPattern::MULSUBW_OP2:
3786 case MachineCombinerPattern::MULSUBX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003787 // MUL I=A,B,0
3788 // SUB R,C,I
3789 // ==> MSUB R,A,B,C (computes C - A*B)
3790 // --- Create(MSUB);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003791 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003792 Opc = AArch64::MSUBWrrr;
3793 RC = &AArch64::GPR32RegClass;
3794 } else {
3795 Opc = AArch64::MSUBXrrr;
3796 RC = &AArch64::GPR64RegClass;
3797 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003798 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003799 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003800 case MachineCombinerPattern::MULSUBWI_OP1:
3801 case MachineCombinerPattern::MULSUBXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003802 // MUL I=A,B,0
3803 // SUB R,I, Imm
3804 // ==> ORR V, ZR, -Imm
3805 // ==> MADD R,A,B,V // = -Imm + A*B
3806 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003807 const TargetRegisterClass *OrrRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003808 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003809 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
Juergen Ributzka25816b02014-08-30 06:16:26 +00003810 OrrOpc = AArch64::ORRWri;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003811 OrrRC = &AArch64::GPR32spRegClass;
3812 BitSize = 32;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003813 ZeroReg = AArch64::WZR;
3814 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003815 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003816 } else {
3817 OrrOpc = AArch64::ORRXri;
Juergen Ributzkaf9660f02014-11-04 22:20:07 +00003818 OrrRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003819 BitSize = 64;
3820 ZeroReg = AArch64::XZR;
3821 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003822 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003823 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003824 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
David Majnemer1182dd82016-07-21 23:46:56 +00003825 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003826 if (Root.getOperand(3).isImm()) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003827 unsigned Val = Root.getOperand(3).getImm();
3828 Imm = Imm << Val;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003829 }
David Majnemer1182dd82016-07-21 23:46:56 +00003830 uint64_t UImm = SignExtend64(-Imm, BitSize);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003831 uint64_t Encoding;
3832 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3833 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003834 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003835 .addReg(ZeroReg)
3836 .addImm(Encoding);
3837 InsInstrs.push_back(MIB1);
3838 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003839 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003840 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003841 break;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003842 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003843 // Floating Point Support
3844 case MachineCombinerPattern::FMULADDS_OP1:
3845 case MachineCombinerPattern::FMULADDD_OP1:
3846 // MUL I=A,B,0
3847 // ADD R,I,C
3848 // ==> MADD R,A,B,C
3849 // --- Create(MADD);
3850 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3851 Opc = AArch64::FMADDSrrr;
3852 RC = &AArch64::FPR32RegClass;
3853 } else {
3854 Opc = AArch64::FMADDDrrr;
3855 RC = &AArch64::FPR64RegClass;
3856 }
3857 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3858 break;
3859 case MachineCombinerPattern::FMULADDS_OP2:
3860 case MachineCombinerPattern::FMULADDD_OP2:
3861 // FMUL I=A,B,0
3862 // FADD R,C,I
3863 // ==> FMADD R,A,B,C
3864 // --- Create(FMADD);
3865 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3866 Opc = AArch64::FMADDSrrr;
3867 RC = &AArch64::FPR32RegClass;
3868 } else {
3869 Opc = AArch64::FMADDDrrr;
3870 RC = &AArch64::FPR64RegClass;
3871 }
3872 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3873 break;
3874
3875 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3876 Opc = AArch64::FMLAv1i32_indexed;
3877 RC = &AArch64::FPR32RegClass;
3878 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3879 FMAInstKind::Indexed);
3880 break;
3881 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3882 Opc = AArch64::FMLAv1i32_indexed;
3883 RC = &AArch64::FPR32RegClass;
3884 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3885 FMAInstKind::Indexed);
3886 break;
3887
3888 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3889 Opc = AArch64::FMLAv1i64_indexed;
3890 RC = &AArch64::FPR64RegClass;
3891 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3892 FMAInstKind::Indexed);
3893 break;
3894 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3895 Opc = AArch64::FMLAv1i64_indexed;
3896 RC = &AArch64::FPR64RegClass;
3897 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3898 FMAInstKind::Indexed);
3899 break;
3900
3901 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3902 case MachineCombinerPattern::FMLAv2f32_OP1:
3903 RC = &AArch64::FPR64RegClass;
3904 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
3905 Opc = AArch64::FMLAv2i32_indexed;
3906 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3907 FMAInstKind::Indexed);
3908 } else {
3909 Opc = AArch64::FMLAv2f32;
3910 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3911 FMAInstKind::Accumulator);
3912 }
3913 break;
3914 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3915 case MachineCombinerPattern::FMLAv2f32_OP2:
3916 RC = &AArch64::FPR64RegClass;
3917 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
3918 Opc = AArch64::FMLAv2i32_indexed;
3919 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3920 FMAInstKind::Indexed);
3921 } else {
3922 Opc = AArch64::FMLAv2f32;
3923 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3924 FMAInstKind::Accumulator);
3925 }
3926 break;
3927
3928 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3929 case MachineCombinerPattern::FMLAv2f64_OP1:
3930 RC = &AArch64::FPR128RegClass;
3931 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
3932 Opc = AArch64::FMLAv2i64_indexed;
3933 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3934 FMAInstKind::Indexed);
3935 } else {
3936 Opc = AArch64::FMLAv2f64;
3937 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3938 FMAInstKind::Accumulator);
3939 }
3940 break;
3941 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3942 case MachineCombinerPattern::FMLAv2f64_OP2:
3943 RC = &AArch64::FPR128RegClass;
3944 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
3945 Opc = AArch64::FMLAv2i64_indexed;
3946 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3947 FMAInstKind::Indexed);
3948 } else {
3949 Opc = AArch64::FMLAv2f64;
3950 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3951 FMAInstKind::Accumulator);
3952 }
3953 break;
3954
3955 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3956 case MachineCombinerPattern::FMLAv4f32_OP1:
3957 RC = &AArch64::FPR128RegClass;
3958 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
3959 Opc = AArch64::FMLAv4i32_indexed;
3960 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3961 FMAInstKind::Indexed);
3962 } else {
3963 Opc = AArch64::FMLAv4f32;
3964 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3965 FMAInstKind::Accumulator);
3966 }
3967 break;
3968
3969 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3970 case MachineCombinerPattern::FMLAv4f32_OP2:
3971 RC = &AArch64::FPR128RegClass;
3972 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
3973 Opc = AArch64::FMLAv4i32_indexed;
3974 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3975 FMAInstKind::Indexed);
3976 } else {
3977 Opc = AArch64::FMLAv4f32;
3978 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3979 FMAInstKind::Accumulator);
3980 }
3981 break;
3982
3983 case MachineCombinerPattern::FMULSUBS_OP1:
3984 case MachineCombinerPattern::FMULSUBD_OP1: {
3985 // FMUL I=A,B,0
3986 // FSUB R,I,C
3987 // ==> FNMSUB R,A,B,C // = -C + A*B
3988 // --- Create(FNMSUB);
3989 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
3990 Opc = AArch64::FNMSUBSrrr;
3991 RC = &AArch64::FPR32RegClass;
3992 } else {
3993 Opc = AArch64::FNMSUBDrrr;
3994 RC = &AArch64::FPR64RegClass;
3995 }
3996 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3997 break;
3998 }
3999 case MachineCombinerPattern::FMULSUBS_OP2:
4000 case MachineCombinerPattern::FMULSUBD_OP2: {
4001 // FMUL I=A,B,0
4002 // FSUB R,C,I
4003 // ==> FMSUB R,A,B,C (computes C - A*B)
4004 // --- Create(FMSUB);
4005 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4006 Opc = AArch64::FMSUBSrrr;
4007 RC = &AArch64::FPR32RegClass;
4008 } else {
4009 Opc = AArch64::FMSUBDrrr;
4010 RC = &AArch64::FPR64RegClass;
4011 }
4012 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4013 break;
4014
4015 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4016 Opc = AArch64::FMLSv1i32_indexed;
4017 RC = &AArch64::FPR32RegClass;
4018 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4019 FMAInstKind::Indexed);
4020 break;
4021
4022 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4023 Opc = AArch64::FMLSv1i64_indexed;
4024 RC = &AArch64::FPR64RegClass;
4025 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4026 FMAInstKind::Indexed);
4027 break;
4028
4029 case MachineCombinerPattern::FMLSv2f32_OP2:
4030 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4031 RC = &AArch64::FPR64RegClass;
4032 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4033 Opc = AArch64::FMLSv2i32_indexed;
4034 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4035 FMAInstKind::Indexed);
4036 } else {
4037 Opc = AArch64::FMLSv2f32;
4038 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4039 FMAInstKind::Accumulator);
4040 }
4041 break;
4042
4043 case MachineCombinerPattern::FMLSv2f64_OP2:
4044 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4045 RC = &AArch64::FPR128RegClass;
4046 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4047 Opc = AArch64::FMLSv2i64_indexed;
4048 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4049 FMAInstKind::Indexed);
4050 } else {
4051 Opc = AArch64::FMLSv2f64;
4052 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4053 FMAInstKind::Accumulator);
4054 }
4055 break;
4056
4057 case MachineCombinerPattern::FMLSv4f32_OP2:
4058 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4059 RC = &AArch64::FPR128RegClass;
4060 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4061 Opc = AArch64::FMLSv4i32_indexed;
4062 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4063 FMAInstKind::Indexed);
4064 } else {
4065 Opc = AArch64::FMLSv4f32;
4066 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4067 FMAInstKind::Accumulator);
4068 }
4069 break;
4070 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004071 } // end switch (Pattern)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004072 // Record MUL and ADD/SUB for deletion
4073 DelInstrs.push_back(MUL);
4074 DelInstrs.push_back(&Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004075}
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004076
4077/// \brief Replace csincr-branch sequence by simple conditional branch
4078///
4079/// Examples:
4080/// 1.
4081/// csinc w9, wzr, wzr, <condition code>
4082/// tbnz w9, #0, 0x44
4083/// to
4084/// b.<inverted condition code>
4085///
4086/// 2.
4087/// csinc w9, wzr, wzr, <condition code>
4088/// tbz w9, #0, 0x44
4089/// to
4090/// b.<condition code>
4091///
Chad Rosier4aeab5f2016-03-21 13:43:58 +00004092/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4093/// compare's constant operand is power of 2.
Balaram Makame9b27252016-03-10 17:54:55 +00004094///
4095/// Examples:
4096/// and w8, w8, #0x400
4097/// cbnz w8, L1
4098/// to
4099/// tbnz w8, #10, L1
4100///
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004101/// \param MI Conditional Branch
4102/// \return True when the simple conditional branch is generated
4103///
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004104bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004105 bool IsNegativeBranch = false;
4106 bool IsTestAndBranch = false;
4107 unsigned TargetBBInMI = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004108 switch (MI.getOpcode()) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004109 default:
4110 llvm_unreachable("Unknown branch instruction?");
4111 case AArch64::Bcc:
4112 return false;
4113 case AArch64::CBZW:
4114 case AArch64::CBZX:
4115 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004116 break;
4117 case AArch64::CBNZW:
4118 case AArch64::CBNZX:
4119 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004120 IsNegativeBranch = true;
4121 break;
4122 case AArch64::TBZW:
4123 case AArch64::TBZX:
4124 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004125 IsTestAndBranch = true;
4126 break;
4127 case AArch64::TBNZW:
4128 case AArch64::TBNZX:
4129 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004130 IsNegativeBranch = true;
4131 IsTestAndBranch = true;
4132 break;
4133 }
4134 // So we increment a zero register and test for bits other
4135 // than bit 0? Conservatively bail out in case the verifier
4136 // missed this case.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004137 if (IsTestAndBranch && MI.getOperand(1).getImm())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004138 return false;
4139
4140 // Find Definition.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004141 assert(MI.getParent() && "Incomplete machine instruciton\n");
4142 MachineBasicBlock *MBB = MI.getParent();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004143 MachineFunction *MF = MBB->getParent();
4144 MachineRegisterInfo *MRI = &MF->getRegInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004145 unsigned VReg = MI.getOperand(0).getReg();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004146 if (!TargetRegisterInfo::isVirtualRegister(VReg))
4147 return false;
4148
4149 MachineInstr *DefMI = MRI->getVRegDef(VReg);
4150
Balaram Makame9b27252016-03-10 17:54:55 +00004151 // Look through COPY instructions to find definition.
4152 while (DefMI->isCopy()) {
4153 unsigned CopyVReg = DefMI->getOperand(1).getReg();
4154 if (!MRI->hasOneNonDBGUse(CopyVReg))
4155 return false;
4156 if (!MRI->hasOneDef(CopyVReg))
4157 return false;
4158 DefMI = MRI->getVRegDef(CopyVReg);
4159 }
4160
4161 switch (DefMI->getOpcode()) {
4162 default:
4163 return false;
4164 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4165 case AArch64::ANDWri:
4166 case AArch64::ANDXri: {
4167 if (IsTestAndBranch)
4168 return false;
4169 if (DefMI->getParent() != MBB)
4170 return false;
4171 if (!MRI->hasOneNonDBGUse(VReg))
4172 return false;
4173
Quentin Colombetabe2d012016-04-25 20:54:08 +00004174 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
Balaram Makame9b27252016-03-10 17:54:55 +00004175 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
Quentin Colombetabe2d012016-04-25 20:54:08 +00004176 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
Balaram Makame9b27252016-03-10 17:54:55 +00004177 if (!isPowerOf2_64(Mask))
4178 return false;
4179
4180 MachineOperand &MO = DefMI->getOperand(1);
4181 unsigned NewReg = MO.getReg();
4182 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4183 return false;
4184
4185 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4186
4187 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004188 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4189 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004190 unsigned Imm = Log2_64(Mask);
Renato Golin179d1f52016-04-23 19:30:52 +00004191 unsigned Opc = (Imm < 32)
4192 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4193 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004194 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4195 .addReg(NewReg)
4196 .addImm(Imm)
4197 .addMBB(TBB);
Matthias Braune25bbd02016-05-03 04:54:16 +00004198 // Register lives on to the CBZ now.
4199 MO.setIsKill(false);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004200
4201 // For immediate smaller than 32, we need to use the 32-bit
4202 // variant (W) in all cases. Indeed the 64-bit variant does not
4203 // allow to encode them.
4204 // Therefore, if the input register is 64-bit, we need to take the
4205 // 32-bit sub-part.
4206 if (!Is32Bit && Imm < 32)
4207 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004208 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004209 return true;
4210 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004211 // Look for CSINC
Balaram Makame9b27252016-03-10 17:54:55 +00004212 case AArch64::CSINCWr:
4213 case AArch64::CSINCXr: {
4214 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4215 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4216 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4217 DefMI->getOperand(2).getReg() == AArch64::XZR))
4218 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004219
Balaram Makame9b27252016-03-10 17:54:55 +00004220 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4221 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004222
Balaram Makame9b27252016-03-10 17:54:55 +00004223 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
Balaram Makame9b27252016-03-10 17:54:55 +00004224 // Convert only when the condition code is not modified between
4225 // the CSINC and the branch. The CC may be used by other
4226 // instructions in between.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00004227 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
Balaram Makame9b27252016-03-10 17:54:55 +00004228 return false;
4229 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004230 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4231 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004232 if (IsNegativeBranch)
4233 CC = AArch64CC::getInvertedCondCode(CC);
4234 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004235 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004236 return true;
4237 }
4238 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004239}
Alex Lorenzf3630112015-08-18 22:52:15 +00004240
4241std::pair<unsigned, unsigned>
4242AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4243 const unsigned Mask = AArch64II::MO_FRAGMENT;
4244 return std::make_pair(TF & Mask, TF & ~Mask);
4245}
4246
4247ArrayRef<std::pair<unsigned, const char *>>
4248AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4249 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004250
Hal Finkel982e8d42015-08-30 08:07:29 +00004251 static const std::pair<unsigned, const char *> TargetFlags[] = {
Alex Lorenzf3630112015-08-18 22:52:15 +00004252 {MO_PAGE, "aarch64-page"},
4253 {MO_PAGEOFF, "aarch64-pageoff"},
4254 {MO_G3, "aarch64-g3"},
4255 {MO_G2, "aarch64-g2"},
4256 {MO_G1, "aarch64-g1"},
4257 {MO_G0, "aarch64-g0"},
4258 {MO_HI12, "aarch64-hi12"}};
4259 return makeArrayRef(TargetFlags);
4260}
4261
4262ArrayRef<std::pair<unsigned, const char *>>
4263AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4264 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004265
Hal Finkel982e8d42015-08-30 08:07:29 +00004266 static const std::pair<unsigned, const char *> TargetFlags[] = {
Alex Lorenzf3630112015-08-18 22:52:15 +00004267 {MO_GOT, "aarch64-got"},
4268 {MO_NC, "aarch64-nc"},
Rafael Espindola4d290992016-05-31 18:31:14 +00004269 {MO_TLS, "aarch64-tls"}};
Alex Lorenzf3630112015-08-18 22:52:15 +00004270 return makeArrayRef(TargetFlags);
4271}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004272
4273unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize,
4274 size_t Occurrences,
4275 bool CanBeTailCall) const {
4276 unsigned NotOutlinedSize = SequenceSize * Occurrences;
4277 unsigned OutlinedSize;
4278
4279 // Is this candidate something we can outline as a tail call?
4280 if (CanBeTailCall) {
4281 // If yes, then we just outline the sequence and replace each of its
4282 // occurrences with a branch instruction.
4283 OutlinedSize = SequenceSize + Occurrences;
4284 } else {
4285 // If no, then we outline the sequence (SequenceSize), add a return (+1),
4286 // and replace each occurrence with a save/restore to LR and a call
4287 // (3 * Occurrences)
4288 OutlinedSize = (SequenceSize + 1) + (3 * Occurrences);
4289 }
4290
4291 // Return the number of instructions saved by outlining this sequence.
4292 return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
4293}
4294
4295bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
Jessica Paquetteeac86332017-03-24 23:00:21 +00004296 return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004297}
4298
4299AArch64GenInstrInfo::MachineOutlinerInstrType
4300AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
4301
4302 MachineFunction *MF = MI.getParent()->getParent();
4303 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
4304
4305 // Don't outline LOHs.
4306 if (FuncInfo->getLOHRelated().count(&MI))
4307 return MachineOutlinerInstrType::Illegal;
4308
4309 // Don't allow debug values to impact outlining type.
4310 if (MI.isDebugValue() || MI.isIndirectDebugValue())
4311 return MachineOutlinerInstrType::Invisible;
4312
4313 // Is this a terminator for a basic block?
4314 if (MI.isTerminator()) {
4315
4316 // Is this the end of a function?
4317 if (MI.getParent()->succ_empty())
4318 return MachineOutlinerInstrType::Legal;
4319
4320 // It's not, so don't outline it.
4321 return MachineOutlinerInstrType::Illegal;
4322 }
4323
4324 // Don't outline positions.
4325 if (MI.isPosition())
4326 return MachineOutlinerInstrType::Illegal;
4327
4328 // Make sure none of the operands are un-outlinable.
4329 for (const MachineOperand &MOP : MI.operands())
4330 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
4331 MOP.isTargetIndex())
4332 return MachineOutlinerInstrType::Illegal;
4333
4334 // Don't outline anything that uses the link register.
4335 if (MI.modifiesRegister(AArch64::LR, &RI) ||
4336 MI.readsRegister(AArch64::LR, &RI))
4337 return MachineOutlinerInstrType::Illegal;
4338
4339 // Does this use the stack?
4340 if (MI.modifiesRegister(AArch64::SP, &RI) ||
4341 MI.readsRegister(AArch64::SP, &RI)) {
4342
4343 // Is it a memory operation?
4344 if (MI.mayLoadOrStore()) {
4345 unsigned Base; // Filled with the base regiser of MI.
4346 int64_t Offset; // Filled with the offset of MI.
4347 unsigned DummyWidth;
4348
4349 // Does it allow us to offset the base register and is the base SP?
4350 if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
4351 Base != AArch64::SP)
4352 return MachineOutlinerInstrType::Illegal;
4353
4354 // Find the minimum/maximum offset for this instruction and check if
4355 // fixing it up would be in range.
4356 int64_t MinOffset, MaxOffset;
4357 unsigned DummyScale;
4358 getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset,
4359 MaxOffset);
4360
4361 // TODO: We should really test what happens if an instruction overflows.
4362 // This is tricky to test with IR tests, but when the outliner is moved
4363 // to a MIR test, it really ought to be checked.
Jessica Paquette5d59a4e2017-03-20 15:51:45 +00004364 if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
Jessica Paquette02cbfb22017-03-20 16:25:04 +00004365 return MachineOutlinerInstrType::Illegal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004366
4367 // It's in range, so we can outline it.
4368 return MachineOutlinerInstrType::Legal;
4369 }
4370
4371 // We can't fix it up, so don't outline it.
4372 return MachineOutlinerInstrType::Illegal;
4373 }
4374
4375 return MachineOutlinerInstrType::Legal;
4376}
4377
4378void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
4379 for (MachineInstr &MI : MBB) {
4380 unsigned Base, Width;
4381 int64_t Offset;
4382
4383 // Is this a load or store with an immediate offset with SP as the base?
4384 if (!MI.mayLoadOrStore() ||
4385 !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
4386 Base != AArch64::SP)
4387 continue;
4388
4389 // It is, so we have to fix it up.
4390 unsigned Scale;
4391 int64_t Dummy1, Dummy2;
4392
4393 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
4394 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
4395 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
4396 assert(Scale != 0 && "Unexpected opcode!");
4397
4398 // We've pushed the return address to the stack, so add 16 to the offset.
4399 // This is safe, since we already checked if it would overflow when we
4400 // checked if this instruction was legal to outline.
4401 int64_t NewImm = (Offset + 16)/Scale;
4402 StackOffsetOperand.setImm(NewImm);
4403 }
4404}
4405
4406void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
4407 MachineFunction &MF,
4408 bool IsTailCall) const {
4409
4410 // If this is a tail call outlined function, then there's already a return.
4411 if (IsTailCall)
4412 return;
4413
4414 // It's not a tail call, so we have to insert the return ourselves.
4415 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
4416 .addReg(AArch64::LR, RegState::Undef);
4417 MBB.insert(MBB.end(), ret);
4418
4419 // Walk over the basic block and fix up all the stack accesses.
4420 fixupPostOutline(MBB);
4421}
4422
4423void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
4424 MachineFunction &MF,
4425 bool IsTailCall) const {}
4426
4427MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
4428 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
4429 MachineFunction &MF, bool IsTailCall) const {
4430
4431 // Are we tail calling?
4432 if (IsTailCall) {
4433 // If yes, then we can just branch to the label.
4434 It = MBB.insert(It,
4435 BuildMI(MF, DebugLoc(), get(AArch64::B))
4436 .addGlobalAddress(M.getNamedValue(MF.getName())));
4437 return It;
4438 }
4439
4440 // We're not tail calling, so we have to save LR before the call and restore
4441 // it after.
4442 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
4443 .addReg(AArch64::SP, RegState::Define)
4444 .addReg(AArch64::LR)
4445 .addReg(AArch64::SP)
4446 .addImm(-16);
4447 It = MBB.insert(It, STRXpre);
4448 It++;
4449
4450 // Insert the call.
4451 It = MBB.insert(It,
4452 BuildMI(MF, DebugLoc(), get(AArch64::BL))
4453 .addGlobalAddress(M.getNamedValue(MF.getName())));
4454
4455 It++;
4456
4457 // Restore the link register.
4458 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
4459 .addReg(AArch64::SP, RegState::Define)
4460 .addReg(AArch64::LR)
4461 .addReg(AArch64::SP)
4462 .addImm(16);
4463 It = MBB.insert(It, LDRXpost);
4464
4465 return It;
4466}
4467