blob: dacb19330c1c72934e44670b97238e529fbca3bf [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
Jessica Paquetteea8cc092017-03-17 22:26:55 +000015#include "AArch64MachineFunctionInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000016#include "AArch64Subtarget.h"
17#include "MCTargetDesc/AArch64AddressingModes.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000018#include "Utils/AArch64BaseInfo.h"
19#include "llvm/ADT/ArrayRef.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000020#include "llvm/ADT/STLExtras.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000021#include "llvm/ADT/SmallVector.h"
Jessica Paquette4cf187b2017-09-27 20:47:39 +000022#include "llvm/CodeGen/LiveRegUnits.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000023#include "llvm/CodeGen/MachineBasicBlock.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000025#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineInstr.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000027#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineMemOperand.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000029#include "llvm/CodeGen/MachineOperand.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000030#include "llvm/CodeGen/MachineRegisterInfo.h"
Diana Picus4b972882016-09-13 07:45:17 +000031#include "llvm/CodeGen/StackMaps.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000032#include "llvm/CodeGen/TargetRegisterInfo.h"
33#include "llvm/CodeGen/TargetSubtargetInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000034#include "llvm/IR/DebugLoc.h"
35#include "llvm/IR/GlobalValue.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000036#include "llvm/MC/MCInst.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000037#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/Support/Casting.h"
39#include "llvm/Support/CodeGen.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/Compiler.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000042#include "llvm/Support/ErrorHandling.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000043#include "llvm/Support/MathExtras.h"
44#include "llvm/Target/TargetMachine.h"
45#include "llvm/Target/TargetOptions.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000046#include <cassert>
47#include <cstdint>
48#include <iterator>
49#include <utility>
Tim Northover3b0846e2014-05-24 12:50:23 +000050
51using namespace llvm;
52
53#define GET_INSTRINFO_CTOR_DTOR
54#include "AArch64GenInstrInfo.inc"
55
Jessica Paquette809d7082017-07-28 03:21:58 +000056static cl::opt<unsigned> TBZDisplacementBits(
57 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59
60static cl::opt<unsigned> CBZDisplacementBits(
61 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
Matt Arsenaulte8da1452016-08-02 08:06:17 +000063
64static cl::opt<unsigned>
Jessica Paquette809d7082017-07-28 03:21:58 +000065 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
Matt Arsenaulte8da1452016-08-02 08:06:17 +000067
Tim Northover3b0846e2014-05-24 12:50:23 +000068AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
69 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
Eric Christophera0de2532015-03-18 20:37:30 +000070 RI(STI.getTargetTriple()), Subtarget(STI) {}
Tim Northover3b0846e2014-05-24 12:50:23 +000071
72/// GetInstSize - Return the number of bytes of code the specified
73/// instruction may be. This returns the maximum number of bytes.
Sjoerd Meijer89217f82016-07-28 16:32:22 +000074unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000075 const MachineBasicBlock &MBB = *MI.getParent();
Tim Northoverd5531f72014-06-17 11:31:42 +000076 const MachineFunction *MF = MBB.getParent();
77 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +000078
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000079 if (MI.getOpcode() == AArch64::INLINEASM)
80 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
Tim Northoverd5531f72014-06-17 11:31:42 +000081
Diana Picus4b972882016-09-13 07:45:17 +000082 // FIXME: We currently only handle pseudoinstructions that don't get expanded
83 // before the assembly printer.
84 unsigned NumBytes = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000085 const MCInstrDesc &Desc = MI.getDesc();
Tim Northover3b0846e2014-05-24 12:50:23 +000086 switch (Desc.getOpcode()) {
87 default:
Diana Picusc65d8bd2016-07-27 15:13:25 +000088 // Anything not explicitly designated otherwise is a normal 4-byte insn.
Diana Picus4b972882016-09-13 07:45:17 +000089 NumBytes = 4;
90 break;
Tim Northover3b0846e2014-05-24 12:50:23 +000091 case TargetOpcode::DBG_VALUE:
92 case TargetOpcode::EH_LABEL:
93 case TargetOpcode::IMPLICIT_DEF:
94 case TargetOpcode::KILL:
Diana Picus4b972882016-09-13 07:45:17 +000095 NumBytes = 0;
96 break;
97 case TargetOpcode::STACKMAP:
98 // The upper bound for a stackmap intrinsic is the full length of its shadow
99 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
100 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
101 break;
102 case TargetOpcode::PATCHPOINT:
103 // The size of the patchpoint intrinsic is the number of bytes requested
104 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
105 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
106 break;
Diana Picusab5a4c72016-08-01 08:38:49 +0000107 case AArch64::TLSDESC_CALLSEQ:
108 // This gets lowered to an instruction sequence which takes 16 bytes
Diana Picus4b972882016-09-13 07:45:17 +0000109 NumBytes = 16;
110 break;
Tim Northover3b0846e2014-05-24 12:50:23 +0000111 }
112
Diana Picus4b972882016-09-13 07:45:17 +0000113 return NumBytes;
Tim Northover3b0846e2014-05-24 12:50:23 +0000114}
115
116static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
117 SmallVectorImpl<MachineOperand> &Cond) {
118 // Block ends with fall-through condbranch.
119 switch (LastInst->getOpcode()) {
120 default:
121 llvm_unreachable("Unknown branch instruction?");
122 case AArch64::Bcc:
123 Target = LastInst->getOperand(1).getMBB();
124 Cond.push_back(LastInst->getOperand(0));
125 break;
126 case AArch64::CBZW:
127 case AArch64::CBZX:
128 case AArch64::CBNZW:
129 case AArch64::CBNZX:
130 Target = LastInst->getOperand(1).getMBB();
131 Cond.push_back(MachineOperand::CreateImm(-1));
132 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
133 Cond.push_back(LastInst->getOperand(0));
134 break;
135 case AArch64::TBZW:
136 case AArch64::TBZX:
137 case AArch64::TBNZW:
138 case AArch64::TBNZX:
139 Target = LastInst->getOperand(2).getMBB();
140 Cond.push_back(MachineOperand::CreateImm(-1));
141 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
142 Cond.push_back(LastInst->getOperand(0));
143 Cond.push_back(LastInst->getOperand(1));
144 }
145}
146
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000147static unsigned getBranchDisplacementBits(unsigned Opc) {
148 switch (Opc) {
149 default:
150 llvm_unreachable("unexpected opcode!");
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000151 case AArch64::B:
152 return 64;
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000153 case AArch64::TBNZW:
154 case AArch64::TBZW:
155 case AArch64::TBNZX:
156 case AArch64::TBZX:
157 return TBZDisplacementBits;
158 case AArch64::CBNZW:
159 case AArch64::CBZW:
160 case AArch64::CBNZX:
161 case AArch64::CBZX:
162 return CBZDisplacementBits;
163 case AArch64::Bcc:
164 return BCCDisplacementBits;
165 }
166}
167
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000168bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
169 int64_t BrOffset) const {
170 unsigned Bits = getBranchDisplacementBits(BranchOp);
171 assert(Bits >= 3 && "max branch displacement must be enough to jump"
172 "over conditional branch expansion");
173 return isIntN(Bits, BrOffset / 4);
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000174}
175
Jessica Paquette809d7082017-07-28 03:21:58 +0000176MachineBasicBlock *
177AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000178 switch (MI.getOpcode()) {
179 default:
180 llvm_unreachable("unexpected opcode!");
181 case AArch64::B:
182 return MI.getOperand(0).getMBB();
183 case AArch64::TBZW:
184 case AArch64::TBNZW:
185 case AArch64::TBZX:
186 case AArch64::TBNZX:
187 return MI.getOperand(2).getMBB();
188 case AArch64::CBZW:
189 case AArch64::CBNZW:
190 case AArch64::CBZX:
191 case AArch64::CBNZX:
192 case AArch64::Bcc:
193 return MI.getOperand(1).getMBB();
194 }
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000195}
196
Tim Northover3b0846e2014-05-24 12:50:23 +0000197// Branch analysis.
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000198bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
199 MachineBasicBlock *&TBB,
200 MachineBasicBlock *&FBB,
201 SmallVectorImpl<MachineOperand> &Cond,
202 bool AllowModify) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000203 // If the block has no terminators, it just falls into the block after it.
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000204 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
205 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000206 return false;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000207
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000208 if (!isUnpredicatedTerminator(*I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000209 return false;
210
211 // Get the last instruction in the block.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000212 MachineInstr *LastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000213
214 // If there is only one terminator instruction, process it.
215 unsigned LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000216 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000217 if (isUncondBranchOpcode(LastOpc)) {
218 TBB = LastInst->getOperand(0).getMBB();
219 return false;
220 }
221 if (isCondBranchOpcode(LastOpc)) {
222 // Block ends with fall-through condbranch.
223 parseCondBranch(LastInst, TBB, Cond);
224 return false;
225 }
226 return true; // Can't handle indirect branch.
227 }
228
229 // Get the instruction before it if it is a terminator.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000230 MachineInstr *SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000231 unsigned SecondLastOpc = SecondLastInst->getOpcode();
232
233 // If AllowModify is true and the block ends with two or more unconditional
234 // branches, delete all but the first unconditional branch.
235 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
236 while (isUncondBranchOpcode(SecondLastOpc)) {
237 LastInst->eraseFromParent();
238 LastInst = SecondLastInst;
239 LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000240 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000241 // Return now the only terminator is an unconditional branch.
242 TBB = LastInst->getOperand(0).getMBB();
243 return false;
244 } else {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000245 SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000246 SecondLastOpc = SecondLastInst->getOpcode();
247 }
248 }
249 }
250
251 // If there are three terminators, we don't know what sort of block this is.
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000252 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000253 return true;
254
255 // If the block ends with a B and a Bcc, handle it.
256 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
257 parseCondBranch(SecondLastInst, TBB, Cond);
258 FBB = LastInst->getOperand(0).getMBB();
259 return false;
260 }
261
262 // If the block ends with two unconditional branches, handle it. The second
263 // one is not executed, so remove it.
264 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
265 TBB = SecondLastInst->getOperand(0).getMBB();
266 I = LastInst;
267 if (AllowModify)
268 I->eraseFromParent();
269 return false;
270 }
271
272 // ...likewise if it ends with an indirect branch followed by an unconditional
273 // branch.
274 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
275 I = LastInst;
276 if (AllowModify)
277 I->eraseFromParent();
278 return true;
279 }
280
281 // Otherwise, can't handle this.
282 return true;
283}
284
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000285bool AArch64InstrInfo::reverseBranchCondition(
Tim Northover3b0846e2014-05-24 12:50:23 +0000286 SmallVectorImpl<MachineOperand> &Cond) const {
287 if (Cond[0].getImm() != -1) {
288 // Regular Bcc
289 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
290 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
291 } else {
292 // Folded compare-and-branch
293 switch (Cond[1].getImm()) {
294 default:
295 llvm_unreachable("Unknown conditional branch!");
296 case AArch64::CBZW:
297 Cond[1].setImm(AArch64::CBNZW);
298 break;
299 case AArch64::CBNZW:
300 Cond[1].setImm(AArch64::CBZW);
301 break;
302 case AArch64::CBZX:
303 Cond[1].setImm(AArch64::CBNZX);
304 break;
305 case AArch64::CBNZX:
306 Cond[1].setImm(AArch64::CBZX);
307 break;
308 case AArch64::TBZW:
309 Cond[1].setImm(AArch64::TBNZW);
310 break;
311 case AArch64::TBNZW:
312 Cond[1].setImm(AArch64::TBZW);
313 break;
314 case AArch64::TBZX:
315 Cond[1].setImm(AArch64::TBNZX);
316 break;
317 case AArch64::TBNZX:
318 Cond[1].setImm(AArch64::TBZX);
319 break;
320 }
321 }
322
323 return false;
324}
325
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000326unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000327 int *BytesRemoved) const {
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000328 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
329 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000330 return 0;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000331
Tim Northover3b0846e2014-05-24 12:50:23 +0000332 if (!isUncondBranchOpcode(I->getOpcode()) &&
333 !isCondBranchOpcode(I->getOpcode()))
334 return 0;
335
336 // Remove the branch.
337 I->eraseFromParent();
338
339 I = MBB.end();
340
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000341 if (I == MBB.begin()) {
342 if (BytesRemoved)
343 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000344 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000345 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000346 --I;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000347 if (!isCondBranchOpcode(I->getOpcode())) {
348 if (BytesRemoved)
349 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000350 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000351 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000352
353 // Remove the branch.
354 I->eraseFromParent();
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000355 if (BytesRemoved)
356 *BytesRemoved = 8;
357
Tim Northover3b0846e2014-05-24 12:50:23 +0000358 return 2;
359}
360
361void AArch64InstrInfo::instantiateCondBranch(
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000362 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000363 ArrayRef<MachineOperand> Cond) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000364 if (Cond[0].getImm() != -1) {
365 // Regular Bcc
366 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
367 } else {
368 // Folded compare-and-branch
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000369 // Note that we use addOperand instead of addReg to keep the flags.
Tim Northover3b0846e2014-05-24 12:50:23 +0000370 const MachineInstrBuilder MIB =
Diana Picus116bbab2017-01-13 09:58:52 +0000371 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
Tim Northover3b0846e2014-05-24 12:50:23 +0000372 if (Cond.size() > 3)
373 MIB.addImm(Cond[3].getImm());
374 MIB.addMBB(TBB);
375 }
376}
377
Jessica Paquette809d7082017-07-28 03:21:58 +0000378unsigned AArch64InstrInfo::insertBranch(
379 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
380 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000381 // Shouldn't be a fall through.
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +0000382 assert(TBB && "insertBranch must not be told to insert a fallthrough");
Tim Northover3b0846e2014-05-24 12:50:23 +0000383
384 if (!FBB) {
385 if (Cond.empty()) // Unconditional branch?
386 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
387 else
388 instantiateCondBranch(MBB, DL, TBB, Cond);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000389
390 if (BytesAdded)
391 *BytesAdded = 4;
392
Tim Northover3b0846e2014-05-24 12:50:23 +0000393 return 1;
394 }
395
396 // Two-way conditional branch.
397 instantiateCondBranch(MBB, DL, TBB, Cond);
398 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000399
400 if (BytesAdded)
401 *BytesAdded = 8;
402
Tim Northover3b0846e2014-05-24 12:50:23 +0000403 return 2;
404}
405
406// Find the original register that VReg is copied from.
407static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
408 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
409 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
410 if (!DefMI->isFullCopy())
411 return VReg;
412 VReg = DefMI->getOperand(1).getReg();
413 }
414 return VReg;
415}
416
417// Determine if VReg is defined by an instruction that can be folded into a
418// csel instruction. If so, return the folded opcode, and the replacement
419// register.
420static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
421 unsigned *NewVReg = nullptr) {
422 VReg = removeCopies(MRI, VReg);
423 if (!TargetRegisterInfo::isVirtualRegister(VReg))
424 return 0;
425
426 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
427 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
428 unsigned Opc = 0;
429 unsigned SrcOpNum = 0;
430 switch (DefMI->getOpcode()) {
431 case AArch64::ADDSXri:
432 case AArch64::ADDSWri:
433 // if NZCV is used, do not fold.
434 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
435 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000436 // fall-through to ADDXri and ADDWri.
437 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000438 case AArch64::ADDXri:
439 case AArch64::ADDWri:
440 // add x, 1 -> csinc.
441 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
442 DefMI->getOperand(3).getImm() != 0)
443 return 0;
444 SrcOpNum = 1;
445 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
446 break;
447
448 case AArch64::ORNXrr:
449 case AArch64::ORNWrr: {
450 // not x -> csinv, represented as orn dst, xzr, src.
451 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
452 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
453 return 0;
454 SrcOpNum = 2;
455 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
456 break;
457 }
458
459 case AArch64::SUBSXrr:
460 case AArch64::SUBSWrr:
461 // if NZCV is used, do not fold.
462 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
463 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000464 // fall-through to SUBXrr and SUBWrr.
465 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000466 case AArch64::SUBXrr:
467 case AArch64::SUBWrr: {
468 // neg x -> csneg, represented as sub dst, xzr, src.
469 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
470 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
471 return 0;
472 SrcOpNum = 2;
473 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
474 break;
475 }
476 default:
477 return 0;
478 }
479 assert(Opc && SrcOpNum && "Missing parameters");
480
481 if (NewVReg)
482 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
483 return Opc;
484}
485
Jessica Paquette809d7082017-07-28 03:21:58 +0000486bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
487 ArrayRef<MachineOperand> Cond,
488 unsigned TrueReg, unsigned FalseReg,
489 int &CondCycles, int &TrueCycles,
490 int &FalseCycles) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000491 // Check register classes.
492 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
493 const TargetRegisterClass *RC =
Eric Christophera0de2532015-03-18 20:37:30 +0000494 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
Tim Northover3b0846e2014-05-24 12:50:23 +0000495 if (!RC)
496 return false;
497
498 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
499 unsigned ExtraCondLat = Cond.size() != 1;
500
501 // GPRs are handled by csel.
502 // FIXME: Fold in x+1, -x, and ~x when applicable.
503 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
504 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
505 // Single-cycle csel, csinc, csinv, and csneg.
506 CondCycles = 1 + ExtraCondLat;
507 TrueCycles = FalseCycles = 1;
508 if (canFoldIntoCSel(MRI, TrueReg))
509 TrueCycles = 0;
510 else if (canFoldIntoCSel(MRI, FalseReg))
511 FalseCycles = 0;
512 return true;
513 }
514
515 // Scalar floating point is handled by fcsel.
516 // FIXME: Form fabs, fmin, and fmax when applicable.
517 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
518 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
519 CondCycles = 5 + ExtraCondLat;
520 TrueCycles = FalseCycles = 2;
521 return true;
522 }
523
524 // Can't do vectors.
525 return false;
526}
527
528void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000529 MachineBasicBlock::iterator I,
530 const DebugLoc &DL, unsigned DstReg,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000531 ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000532 unsigned TrueReg, unsigned FalseReg) const {
533 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
534
535 // Parse the condition code, see parseCondBranch() above.
536 AArch64CC::CondCode CC;
537 switch (Cond.size()) {
538 default:
539 llvm_unreachable("Unknown condition opcode in Cond");
540 case 1: // b.cc
541 CC = AArch64CC::CondCode(Cond[0].getImm());
542 break;
543 case 3: { // cbz/cbnz
544 // We must insert a compare against 0.
545 bool Is64Bit;
546 switch (Cond[1].getImm()) {
547 default:
548 llvm_unreachable("Unknown branch opcode in Cond");
549 case AArch64::CBZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000550 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000551 CC = AArch64CC::EQ;
552 break;
553 case AArch64::CBZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000554 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000555 CC = AArch64CC::EQ;
556 break;
557 case AArch64::CBNZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000558 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000559 CC = AArch64CC::NE;
560 break;
561 case AArch64::CBNZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000562 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000563 CC = AArch64CC::NE;
564 break;
565 }
566 unsigned SrcReg = Cond[2].getReg();
567 if (Is64Bit) {
568 // cmp reg, #0 is actually subs xzr, reg, #0.
569 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
570 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
571 .addReg(SrcReg)
572 .addImm(0)
573 .addImm(0);
574 } else {
575 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
576 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
577 .addReg(SrcReg)
578 .addImm(0)
579 .addImm(0);
580 }
581 break;
582 }
583 case 4: { // tbz/tbnz
584 // We must insert a tst instruction.
585 switch (Cond[1].getImm()) {
586 default:
587 llvm_unreachable("Unknown branch opcode in Cond");
588 case AArch64::TBZW:
589 case AArch64::TBZX:
590 CC = AArch64CC::EQ;
591 break;
592 case AArch64::TBNZW:
593 case AArch64::TBNZX:
594 CC = AArch64CC::NE;
595 break;
596 }
597 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
598 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
599 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
600 .addReg(Cond[2].getReg())
601 .addImm(
602 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
603 else
604 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
605 .addReg(Cond[2].getReg())
606 .addImm(
607 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
608 break;
609 }
610 }
611
612 unsigned Opc = 0;
613 const TargetRegisterClass *RC = nullptr;
614 bool TryFold = false;
615 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
616 RC = &AArch64::GPR64RegClass;
617 Opc = AArch64::CSELXr;
618 TryFold = true;
619 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
620 RC = &AArch64::GPR32RegClass;
621 Opc = AArch64::CSELWr;
622 TryFold = true;
623 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
624 RC = &AArch64::FPR64RegClass;
625 Opc = AArch64::FCSELDrrr;
626 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
627 RC = &AArch64::FPR32RegClass;
628 Opc = AArch64::FCSELSrrr;
629 }
630 assert(RC && "Unsupported regclass");
631
632 // Try folding simple instructions into the csel.
633 if (TryFold) {
634 unsigned NewVReg = 0;
635 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
636 if (FoldedOpc) {
637 // The folded opcodes csinc, csinc and csneg apply the operation to
638 // FalseReg, so we need to invert the condition.
639 CC = AArch64CC::getInvertedCondCode(CC);
640 TrueReg = FalseReg;
641 } else
642 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
643
644 // Fold the operation. Leave any dead instructions for DCE to clean up.
645 if (FoldedOpc) {
646 FalseReg = NewVReg;
647 Opc = FoldedOpc;
648 // The extends the live range of NewVReg.
649 MRI.clearKillFlags(NewVReg);
650 }
651 }
652
653 // Pull all virtual register into the appropriate class.
654 MRI.constrainRegClass(TrueReg, RC);
655 MRI.constrainRegClass(FalseReg, RC);
656
657 // Insert the csel.
Jessica Paquette809d7082017-07-28 03:21:58 +0000658 BuildMI(MBB, I, DL, get(Opc), DstReg)
659 .addReg(TrueReg)
660 .addReg(FalseReg)
661 .addImm(CC);
Tim Northover3b0846e2014-05-24 12:50:23 +0000662}
663
Lawrence Hu687097a2015-07-23 23:55:28 +0000664/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000665static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
666 uint64_t Imm = MI.getOperand(1).getImm();
Weiming Zhaob33a5552015-07-23 19:24:53 +0000667 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
668 uint64_t Encoding;
669 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
670}
671
Jiangning Liucd296372014-07-29 02:09:26 +0000672// FIXME: this implementation should be micro-architecture dependent, so a
673// micro-architecture target hook should be introduced here in future.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000674bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
Matthias Braun651cff42016-06-02 18:03:53 +0000675 if (!Subtarget.hasCustomCheapAsMoveHandling())
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000676 return MI.isAsCheapAsAMove();
Evandro Menezes509516d2017-08-28 22:51:32 +0000677 if (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
678 isExynosShiftLeftFast(MI))
679 return true;
Evandro Menezesd23324a2016-05-04 20:47:25 +0000680
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000681 switch (MI.getOpcode()) {
Jiangning Liucd296372014-07-29 02:09:26 +0000682 default:
683 return false;
684
685 // add/sub on register without shift
686 case AArch64::ADDWri:
687 case AArch64::ADDXri:
688 case AArch64::SUBWri:
689 case AArch64::SUBXri:
Evandro Menezes509516d2017-08-28 22:51:32 +0000690 return (MI.getOperand(3).getImm() == 0);
Jiangning Liucd296372014-07-29 02:09:26 +0000691
692 // logical ops on immediate
693 case AArch64::ANDWri:
694 case AArch64::ANDXri:
695 case AArch64::EORWri:
696 case AArch64::EORXri:
697 case AArch64::ORRWri:
698 case AArch64::ORRXri:
699 return true;
700
701 // logical ops on register without shift
702 case AArch64::ANDWrr:
703 case AArch64::ANDXrr:
704 case AArch64::BICWrr:
705 case AArch64::BICXrr:
706 case AArch64::EONWrr:
707 case AArch64::EONXrr:
708 case AArch64::EORWrr:
709 case AArch64::EORXrr:
710 case AArch64::ORNWrr:
711 case AArch64::ORNXrr:
712 case AArch64::ORRWrr:
713 case AArch64::ORRXrr:
714 return true;
Evandro Menezesd23324a2016-05-04 20:47:25 +0000715
Weiming Zhaob33a5552015-07-23 19:24:53 +0000716 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
717 // ORRXri, it is as cheap as MOV
718 case AArch64::MOVi32imm:
719 return canBeExpandedToORR(MI, 32);
720 case AArch64::MOVi64imm:
721 return canBeExpandedToORR(MI, 64);
Haicheng Wu711ca862016-07-12 15:31:41 +0000722
Haicheng Wuf0b01272016-07-15 00:27:01 +0000723 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
724 // feature.
Sjoerd Meijerb0eb5fb2017-08-24 14:47:06 +0000725 case AArch64::FMOVH0:
Haicheng Wu711ca862016-07-12 15:31:41 +0000726 case AArch64::FMOVS0:
727 case AArch64::FMOVD0:
728 return Subtarget.hasZeroCycleZeroing();
Haicheng Wuf0b01272016-07-15 00:27:01 +0000729 case TargetOpcode::COPY:
730 return (Subtarget.hasZeroCycleZeroing() &&
731 (MI.getOperand(1).getReg() == AArch64::WZR ||
732 MI.getOperand(1).getReg() == AArch64::XZR));
Jiangning Liucd296372014-07-29 02:09:26 +0000733 }
734
735 llvm_unreachable("Unknown opcode to check as cheap as a move!");
736}
737
Evandro Menezes509516d2017-08-28 22:51:32 +0000738bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
739 unsigned Imm, Shift;
Evandro Menezes91650ef2017-09-18 19:00:36 +0000740 AArch64_AM::ShiftExtendType Ext;
Evandro Menezes509516d2017-08-28 22:51:32 +0000741
742 switch (MI.getOpcode()) {
743 default:
744 return false;
745
746 // WriteI
747 case AArch64::ADDSWri:
748 case AArch64::ADDSXri:
749 case AArch64::ADDWri:
750 case AArch64::ADDXri:
751 case AArch64::SUBSWri:
752 case AArch64::SUBSXri:
753 case AArch64::SUBWri:
754 case AArch64::SUBXri:
755 return true;
756
757 // WriteISReg
758 case AArch64::ADDSWrs:
759 case AArch64::ADDSXrs:
760 case AArch64::ADDWrs:
761 case AArch64::ADDXrs:
762 case AArch64::ANDSWrs:
763 case AArch64::ANDSXrs:
764 case AArch64::ANDWrs:
765 case AArch64::ANDXrs:
766 case AArch64::BICSWrs:
767 case AArch64::BICSXrs:
768 case AArch64::BICWrs:
769 case AArch64::BICXrs:
770 case AArch64::EONWrs:
771 case AArch64::EONXrs:
772 case AArch64::EORWrs:
773 case AArch64::EORXrs:
774 case AArch64::ORNWrs:
775 case AArch64::ORNXrs:
776 case AArch64::ORRWrs:
777 case AArch64::ORRXrs:
778 case AArch64::SUBSWrs:
779 case AArch64::SUBSXrs:
780 case AArch64::SUBWrs:
781 case AArch64::SUBXrs:
782 Imm = MI.getOperand(3).getImm();
783 Shift = AArch64_AM::getShiftValue(Imm);
Evandro Menezes91650ef2017-09-18 19:00:36 +0000784 Ext = AArch64_AM::getShiftType(Imm);
785 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
Evandro Menezes509516d2017-08-28 22:51:32 +0000786
787 // WriteIEReg
788 case AArch64::ADDSWrx:
789 case AArch64::ADDSXrx:
790 case AArch64::ADDSXrx64:
791 case AArch64::ADDWrx:
792 case AArch64::ADDXrx:
793 case AArch64::ADDXrx64:
794 case AArch64::SUBSWrx:
795 case AArch64::SUBSXrx:
796 case AArch64::SUBSXrx64:
797 case AArch64::SUBWrx:
798 case AArch64::SUBXrx:
799 case AArch64::SUBXrx64:
800 Imm = MI.getOperand(3).getImm();
801 Shift = AArch64_AM::getArithShiftValue(Imm);
Evandro Menezes91650ef2017-09-18 19:00:36 +0000802 Ext = AArch64_AM::getArithExtendType(Imm);
803 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
804
805 case AArch64::PRFMroW:
806 case AArch64::PRFMroX:
807
808 // WriteLDIdx
809 case AArch64::LDRBBroW:
810 case AArch64::LDRBBroX:
811 case AArch64::LDRHHroW:
812 case AArch64::LDRHHroX:
813 case AArch64::LDRSBWroW:
814 case AArch64::LDRSBWroX:
815 case AArch64::LDRSBXroW:
816 case AArch64::LDRSBXroX:
817 case AArch64::LDRSHWroW:
818 case AArch64::LDRSHWroX:
819 case AArch64::LDRSHXroW:
820 case AArch64::LDRSHXroX:
821 case AArch64::LDRSWroW:
822 case AArch64::LDRSWroX:
823 case AArch64::LDRWroW:
824 case AArch64::LDRWroX:
825 case AArch64::LDRXroW:
826 case AArch64::LDRXroX:
827
828 case AArch64::LDRBroW:
829 case AArch64::LDRBroX:
830 case AArch64::LDRDroW:
831 case AArch64::LDRDroX:
832 case AArch64::LDRHroW:
833 case AArch64::LDRHroX:
834 case AArch64::LDRSroW:
835 case AArch64::LDRSroX:
836
837 // WriteSTIdx
838 case AArch64::STRBBroW:
839 case AArch64::STRBBroX:
840 case AArch64::STRHHroW:
841 case AArch64::STRHHroX:
842 case AArch64::STRWroW:
843 case AArch64::STRWroX:
844 case AArch64::STRXroW:
845 case AArch64::STRXroX:
846
847 case AArch64::STRBroW:
848 case AArch64::STRBroX:
849 case AArch64::STRDroW:
850 case AArch64::STRDroX:
851 case AArch64::STRHroW:
852 case AArch64::STRHroX:
853 case AArch64::STRSroW:
854 case AArch64::STRSroX:
855 Imm = MI.getOperand(3).getImm();
856 Ext = AArch64_AM::getMemExtendType(Imm);
857 return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
Evandro Menezes509516d2017-08-28 22:51:32 +0000858 }
859}
860
Geoff Berryd6ac96f2017-05-23 19:57:45 +0000861bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
862 switch (MI.getOpcode()) {
863 default:
Balaram Makamb4419f92017-04-08 03:30:15 +0000864 return false;
Geoff Berryd6ac96f2017-05-23 19:57:45 +0000865
866 case AArch64::ADDWrs:
867 case AArch64::ADDXrs:
868 case AArch64::ADDSWrs:
869 case AArch64::ADDSXrs: {
870 unsigned Imm = MI.getOperand(3).getImm();
871 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
872 if (ShiftVal == 0)
873 return true;
874 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
875 }
876
877 case AArch64::ADDWrx:
878 case AArch64::ADDXrx:
879 case AArch64::ADDXrx64:
880 case AArch64::ADDSWrx:
881 case AArch64::ADDSXrx:
882 case AArch64::ADDSXrx64: {
883 unsigned Imm = MI.getOperand(3).getImm();
884 switch (AArch64_AM::getArithExtendType(Imm)) {
885 default:
886 return false;
887 case AArch64_AM::UXTB:
888 case AArch64_AM::UXTH:
889 case AArch64_AM::UXTW:
890 case AArch64_AM::UXTX:
891 return AArch64_AM::getArithShiftValue(Imm) <= 4;
892 }
893 }
894
895 case AArch64::SUBWrs:
896 case AArch64::SUBSWrs: {
897 unsigned Imm = MI.getOperand(3).getImm();
898 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
899 return ShiftVal == 0 ||
900 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
901 }
902
903 case AArch64::SUBXrs:
904 case AArch64::SUBSXrs: {
905 unsigned Imm = MI.getOperand(3).getImm();
906 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
907 return ShiftVal == 0 ||
908 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
909 }
910
911 case AArch64::SUBWrx:
912 case AArch64::SUBXrx:
913 case AArch64::SUBXrx64:
914 case AArch64::SUBSWrx:
915 case AArch64::SUBSXrx:
916 case AArch64::SUBSXrx64: {
917 unsigned Imm = MI.getOperand(3).getImm();
918 switch (AArch64_AM::getArithExtendType(Imm)) {
919 default:
920 return false;
921 case AArch64_AM::UXTB:
922 case AArch64_AM::UXTH:
923 case AArch64_AM::UXTW:
924 case AArch64_AM::UXTX:
925 return AArch64_AM::getArithShiftValue(Imm) == 0;
926 }
927 }
928
929 case AArch64::LDRBBroW:
930 case AArch64::LDRBBroX:
931 case AArch64::LDRBroW:
932 case AArch64::LDRBroX:
933 case AArch64::LDRDroW:
934 case AArch64::LDRDroX:
935 case AArch64::LDRHHroW:
936 case AArch64::LDRHHroX:
937 case AArch64::LDRHroW:
938 case AArch64::LDRHroX:
939 case AArch64::LDRQroW:
940 case AArch64::LDRQroX:
941 case AArch64::LDRSBWroW:
942 case AArch64::LDRSBWroX:
943 case AArch64::LDRSBXroW:
944 case AArch64::LDRSBXroX:
945 case AArch64::LDRSHWroW:
946 case AArch64::LDRSHWroX:
947 case AArch64::LDRSHXroW:
948 case AArch64::LDRSHXroX:
949 case AArch64::LDRSWroW:
950 case AArch64::LDRSWroX:
951 case AArch64::LDRSroW:
952 case AArch64::LDRSroX:
953 case AArch64::LDRWroW:
954 case AArch64::LDRWroX:
955 case AArch64::LDRXroW:
956 case AArch64::LDRXroX:
957 case AArch64::PRFMroW:
958 case AArch64::PRFMroX:
959 case AArch64::STRBBroW:
960 case AArch64::STRBBroX:
961 case AArch64::STRBroW:
962 case AArch64::STRBroX:
963 case AArch64::STRDroW:
964 case AArch64::STRDroX:
965 case AArch64::STRHHroW:
966 case AArch64::STRHHroX:
967 case AArch64::STRHroW:
968 case AArch64::STRHroX:
969 case AArch64::STRQroW:
970 case AArch64::STRQroX:
971 case AArch64::STRSroW:
972 case AArch64::STRSroX:
973 case AArch64::STRWroW:
974 case AArch64::STRWroX:
975 case AArch64::STRXroW:
976 case AArch64::STRXroX: {
977 unsigned IsSigned = MI.getOperand(3).getImm();
978 return !IsSigned;
979 }
980 }
Balaram Makamb4419f92017-04-08 03:30:15 +0000981}
982
Tim Northover3b0846e2014-05-24 12:50:23 +0000983bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
984 unsigned &SrcReg, unsigned &DstReg,
985 unsigned &SubIdx) const {
986 switch (MI.getOpcode()) {
987 default:
988 return false;
989 case AArch64::SBFMXri: // aka sxtw
990 case AArch64::UBFMXri: // aka uxtw
991 // Check for the 32 -> 64 bit extension case, these instructions can do
992 // much more.
993 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
994 return false;
995 // This is a signed or unsigned 32 -> 64 bit extension.
996 SrcReg = MI.getOperand(1).getReg();
997 DstReg = MI.getOperand(0).getReg();
998 SubIdx = AArch64::sub_32;
999 return true;
1000 }
1001}
1002
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001003bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1004 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
Eric Christophera0de2532015-03-18 20:37:30 +00001005 const TargetRegisterInfo *TRI = &getRegisterInfo();
Chad Rosier3528c1e2014-09-08 14:43:48 +00001006 unsigned BaseRegA = 0, BaseRegB = 0;
Chad Rosier0da267d2016-03-09 16:46:48 +00001007 int64_t OffsetA = 0, OffsetB = 0;
1008 unsigned WidthA = 0, WidthB = 0;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001009
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001010 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1011 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001012
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001013 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1014 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
Chad Rosier3528c1e2014-09-08 14:43:48 +00001015 return false;
1016
1017 // Retrieve the base register, offset from the base register and width. Width
1018 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1019 // base registers are identical, and the offset of a lower memory access +
1020 // the width doesn't overlap the offset of a higher memory access,
1021 // then the memory accesses are different.
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001022 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1023 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
Chad Rosier3528c1e2014-09-08 14:43:48 +00001024 if (BaseRegA == BaseRegB) {
1025 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1026 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1027 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1028 if (LowOffset + LowWidth <= HighOffset)
1029 return true;
1030 }
1031 }
1032 return false;
1033}
1034
Tim Northover3b0846e2014-05-24 12:50:23 +00001035/// analyzeCompare - For a comparison instruction, return the source registers
1036/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1037/// Return true if the comparison instruction can be analyzed.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001038bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
Tim Northover3b0846e2014-05-24 12:50:23 +00001039 unsigned &SrcReg2, int &CmpMask,
1040 int &CmpValue) const {
Tim Northover350a87e2017-10-17 21:43:52 +00001041 // The first operand can be a frame index where we'd normally expect a
1042 // register.
1043 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1044 if (!MI.getOperand(1).isReg())
1045 return false;
1046
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001047 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001048 default:
1049 break;
1050 case AArch64::SUBSWrr:
1051 case AArch64::SUBSWrs:
1052 case AArch64::SUBSWrx:
1053 case AArch64::SUBSXrr:
1054 case AArch64::SUBSXrs:
1055 case AArch64::SUBSXrx:
1056 case AArch64::ADDSWrr:
1057 case AArch64::ADDSWrs:
1058 case AArch64::ADDSWrx:
1059 case AArch64::ADDSXrr:
1060 case AArch64::ADDSXrs:
1061 case AArch64::ADDSXrx:
1062 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001063 SrcReg = MI.getOperand(1).getReg();
1064 SrcReg2 = MI.getOperand(2).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001065 CmpMask = ~0;
1066 CmpValue = 0;
1067 return true;
1068 case AArch64::SUBSWri:
1069 case AArch64::ADDSWri:
1070 case AArch64::SUBSXri:
1071 case AArch64::ADDSXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001072 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001073 SrcReg2 = 0;
1074 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +00001075 // FIXME: In order to convert CmpValue to 0 or 1
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001076 CmpValue = MI.getOperand(2).getImm() != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001077 return true;
1078 case AArch64::ANDSWri:
1079 case AArch64::ANDSXri:
1080 // ANDS does not use the same encoding scheme as the others xxxS
1081 // instructions.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001082 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001083 SrcReg2 = 0;
1084 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +00001085 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1086 // while the type of CmpValue is int. When converting uint64_t to int,
1087 // the high 32 bits of uint64_t will be lost.
1088 // In fact it causes a bug in spec2006-483.xalancbmk
1089 // CmpValue is only used to compare with zero in OptimizeCompareInstr
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001090 CmpValue = AArch64_AM::decodeLogicalImmediate(
1091 MI.getOperand(2).getImm(),
1092 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001093 return true;
1094 }
1095
1096 return false;
1097}
1098
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001099static bool UpdateOperandRegClass(MachineInstr &Instr) {
1100 MachineBasicBlock *MBB = Instr.getParent();
Tim Northover3b0846e2014-05-24 12:50:23 +00001101 assert(MBB && "Can't get MachineBasicBlock here");
1102 MachineFunction *MF = MBB->getParent();
1103 assert(MF && "Can't get MachineFunction here");
Eric Christopher6c901622015-01-28 03:51:33 +00001104 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1105 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001106 MachineRegisterInfo *MRI = &MF->getRegInfo();
1107
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001108 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
Tim Northover3b0846e2014-05-24 12:50:23 +00001109 ++OpIdx) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001110 MachineOperand &MO = Instr.getOperand(OpIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001111 const TargetRegisterClass *OpRegCstraints =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001112 Instr.getRegClassConstraint(OpIdx, TII, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001113
1114 // If there's no constraint, there's nothing to do.
1115 if (!OpRegCstraints)
1116 continue;
1117 // If the operand is a frame index, there's nothing to do here.
1118 // A frame index operand will resolve correctly during PEI.
1119 if (MO.isFI())
1120 continue;
1121
1122 assert(MO.isReg() &&
1123 "Operand has register constraints without being a register!");
1124
1125 unsigned Reg = MO.getReg();
1126 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1127 if (!OpRegCstraints->contains(Reg))
1128 return false;
1129 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1130 !MRI->constrainRegClass(Reg, OpRegCstraints))
1131 return false;
1132 }
1133
1134 return true;
1135}
1136
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001137/// \brief Return the opcode that does not set flags when possible - otherwise
1138/// return the original opcode. The caller is responsible to do the actual
1139/// substitution and legality checking.
Chad Rosier6db9ff62017-06-23 19:20:12 +00001140static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001141 // Don't convert all compare instructions, because for some the zero register
1142 // encoding becomes the sp register.
1143 bool MIDefinesZeroReg = false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001144 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001145 MIDefinesZeroReg = true;
1146
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001147 switch (MI.getOpcode()) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001148 default:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001149 return MI.getOpcode();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001150 case AArch64::ADDSWrr:
1151 return AArch64::ADDWrr;
1152 case AArch64::ADDSWri:
1153 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1154 case AArch64::ADDSWrs:
1155 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1156 case AArch64::ADDSWrx:
1157 return AArch64::ADDWrx;
1158 case AArch64::ADDSXrr:
1159 return AArch64::ADDXrr;
1160 case AArch64::ADDSXri:
1161 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1162 case AArch64::ADDSXrs:
1163 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1164 case AArch64::ADDSXrx:
1165 return AArch64::ADDXrx;
1166 case AArch64::SUBSWrr:
1167 return AArch64::SUBWrr;
1168 case AArch64::SUBSWri:
1169 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1170 case AArch64::SUBSWrs:
1171 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1172 case AArch64::SUBSWrx:
1173 return AArch64::SUBWrx;
1174 case AArch64::SUBSXrr:
1175 return AArch64::SUBXrr;
1176 case AArch64::SUBSXri:
1177 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1178 case AArch64::SUBSXrs:
1179 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1180 case AArch64::SUBSXrx:
1181 return AArch64::SUBXrx;
1182 }
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001183}
Tim Northover3b0846e2014-05-24 12:50:23 +00001184
Jessica Paquette809d7082017-07-28 03:21:58 +00001185enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001186
1187/// True when condition flags are accessed (either by writing or reading)
1188/// on the instruction trace starting at From and ending at To.
1189///
1190/// Note: If From and To are from different blocks it's assumed CC are accessed
1191/// on the path.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001192static bool areCFlagsAccessedBetweenInstrs(
1193 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1194 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001195 // Early exit if To is at the beginning of the BB.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001196 if (To == To->getParent()->begin())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001197 return true;
1198
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001199 // Check whether the instructions are in the same basic block
1200 // If not, assume the condition flags might get modified somewhere.
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001201 if (To->getParent() != From->getParent())
1202 return true;
1203
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001204 // From must be above To.
Duncan P. N. Exon Smith18720962016-09-11 18:51:28 +00001205 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1206 [From](MachineInstr &MI) {
1207 return MI.getIterator() == From;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001208 }) != To->getParent()->rend());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001209
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001210 // We iterate backward starting \p To until we hit \p From.
1211 for (--To; To != From; --To) {
1212 const MachineInstr &Instr = *To;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001213
Jessica Paquette809d7082017-07-28 03:21:58 +00001214 if (((AccessToCheck & AK_Write) &&
1215 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1216 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001217 return true;
1218 }
1219 return false;
1220}
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001221
1222/// Try to optimize a compare instruction. A compare instruction is an
Jessica Paquette809d7082017-07-28 03:21:58 +00001223/// instruction which produces AArch64::NZCV. It can be truly compare
1224/// instruction
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001225/// when there are no uses of its destination register.
1226///
1227/// The following steps are tried in order:
1228/// 1. Convert CmpInstr into an unconditional version.
1229/// 2. Remove CmpInstr if above there is an instruction producing a needed
Jessica Paquette809d7082017-07-28 03:21:58 +00001230/// condition code or an instruction which can be converted into such an
1231/// instruction.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001232/// Only comparison with zero is supported.
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001233bool AArch64InstrInfo::optimizeCompareInstr(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001234 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001235 int CmpValue, const MachineRegisterInfo *MRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001236 assert(CmpInstr.getParent());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001237 assert(MRI);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001238
1239 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001240 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001241 if (DeadNZCVIdx != -1) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001242 if (CmpInstr.definesRegister(AArch64::WZR) ||
1243 CmpInstr.definesRegister(AArch64::XZR)) {
1244 CmpInstr.eraseFromParent();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001245 return true;
1246 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001247 unsigned Opc = CmpInstr.getOpcode();
Chad Rosier6db9ff62017-06-23 19:20:12 +00001248 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001249 if (NewOpc == Opc)
1250 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001251 const MCInstrDesc &MCID = get(NewOpc);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001252 CmpInstr.setDesc(MCID);
1253 CmpInstr.RemoveOperand(DeadNZCVIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001254 bool succeeded = UpdateOperandRegClass(CmpInstr);
1255 (void)succeeded;
1256 assert(succeeded && "Some operands reg class are incompatible!");
1257 return true;
1258 }
1259
1260 // Continue only if we have a "ri" where immediate is zero.
Jiangning Liudcc651f2014-08-08 14:19:29 +00001261 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1262 // function.
1263 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
Tim Northover3b0846e2014-05-24 12:50:23 +00001264 if (CmpValue != 0 || SrcReg2 != 0)
1265 return false;
1266
1267 // CmpInstr is a Compare instruction if destination register is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001268 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
Tim Northover3b0846e2014-05-24 12:50:23 +00001269 return false;
1270
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001271 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001272}
Tim Northover3b0846e2014-05-24 12:50:23 +00001273
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001274/// Get opcode of S version of Instr.
1275/// If Instr is S version its opcode is returned.
1276/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1277/// or we are not interested in it.
1278static unsigned sForm(MachineInstr &Instr) {
1279 switch (Instr.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001280 default:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001281 return AArch64::INSTRUCTION_LIST_END;
1282
Tim Northover3b0846e2014-05-24 12:50:23 +00001283 case AArch64::ADDSWrr:
1284 case AArch64::ADDSWri:
1285 case AArch64::ADDSXrr:
1286 case AArch64::ADDSXri:
1287 case AArch64::SUBSWrr:
1288 case AArch64::SUBSWri:
1289 case AArch64::SUBSXrr:
1290 case AArch64::SUBSXri:
Eugene Zelenko049b0172017-01-06 00:30:53 +00001291 return Instr.getOpcode();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001292
Jessica Paquette809d7082017-07-28 03:21:58 +00001293 case AArch64::ADDWrr:
1294 return AArch64::ADDSWrr;
1295 case AArch64::ADDWri:
1296 return AArch64::ADDSWri;
1297 case AArch64::ADDXrr:
1298 return AArch64::ADDSXrr;
1299 case AArch64::ADDXri:
1300 return AArch64::ADDSXri;
1301 case AArch64::ADCWr:
1302 return AArch64::ADCSWr;
1303 case AArch64::ADCXr:
1304 return AArch64::ADCSXr;
1305 case AArch64::SUBWrr:
1306 return AArch64::SUBSWrr;
1307 case AArch64::SUBWri:
1308 return AArch64::SUBSWri;
1309 case AArch64::SUBXrr:
1310 return AArch64::SUBSXrr;
1311 case AArch64::SUBXri:
1312 return AArch64::SUBSXri;
1313 case AArch64::SBCWr:
1314 return AArch64::SBCSWr;
1315 case AArch64::SBCXr:
1316 return AArch64::SBCSXr;
1317 case AArch64::ANDWri:
1318 return AArch64::ANDSWri;
1319 case AArch64::ANDXri:
1320 return AArch64::ANDSXri;
Tim Northover3b0846e2014-05-24 12:50:23 +00001321 }
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001322}
1323
1324/// Check if AArch64::NZCV should be alive in successors of MBB.
1325static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1326 for (auto *BB : MBB->successors())
1327 if (BB->isLiveIn(AArch64::NZCV))
1328 return true;
1329 return false;
1330}
1331
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001332namespace {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001333
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001334struct UsedNZCV {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001335 bool N = false;
1336 bool Z = false;
1337 bool C = false;
1338 bool V = false;
1339
1340 UsedNZCV() = default;
1341
Jessica Paquette809d7082017-07-28 03:21:58 +00001342 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001343 this->N |= UsedFlags.N;
1344 this->Z |= UsedFlags.Z;
1345 this->C |= UsedFlags.C;
1346 this->V |= UsedFlags.V;
1347 return *this;
1348 }
1349};
Eugene Zelenko049b0172017-01-06 00:30:53 +00001350
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001351} // end anonymous namespace
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001352
1353/// Find a condition code used by the instruction.
1354/// Returns AArch64CC::Invalid if either the instruction does not use condition
1355/// codes or we don't optimize CmpInstr in the presence of such instructions.
1356static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1357 switch (Instr.getOpcode()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001358 default:
1359 return AArch64CC::Invalid;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001360
Jessica Paquette809d7082017-07-28 03:21:58 +00001361 case AArch64::Bcc: {
1362 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1363 assert(Idx >= 2);
1364 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1365 }
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001366
Jessica Paquette809d7082017-07-28 03:21:58 +00001367 case AArch64::CSINVWr:
1368 case AArch64::CSINVXr:
1369 case AArch64::CSINCWr:
1370 case AArch64::CSINCXr:
1371 case AArch64::CSELWr:
1372 case AArch64::CSELXr:
1373 case AArch64::CSNEGWr:
1374 case AArch64::CSNEGXr:
1375 case AArch64::FCSELSrrr:
1376 case AArch64::FCSELDrrr: {
1377 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1378 assert(Idx >= 1);
1379 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1380 }
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001381 }
1382}
1383
1384static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1385 assert(CC != AArch64CC::Invalid);
1386 UsedNZCV UsedFlags;
1387 switch (CC) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001388 default:
1389 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001390
Jessica Paquette809d7082017-07-28 03:21:58 +00001391 case AArch64CC::EQ: // Z set
1392 case AArch64CC::NE: // Z clear
1393 UsedFlags.Z = true;
1394 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001395
Jessica Paquette809d7082017-07-28 03:21:58 +00001396 case AArch64CC::HI: // Z clear and C set
1397 case AArch64CC::LS: // Z set or C clear
1398 UsedFlags.Z = true;
1399 LLVM_FALLTHROUGH;
1400 case AArch64CC::HS: // C set
1401 case AArch64CC::LO: // C clear
1402 UsedFlags.C = true;
1403 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001404
Jessica Paquette809d7082017-07-28 03:21:58 +00001405 case AArch64CC::MI: // N set
1406 case AArch64CC::PL: // N clear
1407 UsedFlags.N = true;
1408 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001409
Jessica Paquette809d7082017-07-28 03:21:58 +00001410 case AArch64CC::VS: // V set
1411 case AArch64CC::VC: // V clear
1412 UsedFlags.V = true;
1413 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001414
Jessica Paquette809d7082017-07-28 03:21:58 +00001415 case AArch64CC::GT: // Z clear, N and V the same
1416 case AArch64CC::LE: // Z set, N and V differ
1417 UsedFlags.Z = true;
1418 LLVM_FALLTHROUGH;
1419 case AArch64CC::GE: // N and V the same
1420 case AArch64CC::LT: // N and V differ
1421 UsedFlags.N = true;
1422 UsedFlags.V = true;
1423 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001424 }
1425 return UsedFlags;
1426}
1427
1428static bool isADDSRegImm(unsigned Opcode) {
1429 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1430}
1431
1432static bool isSUBSRegImm(unsigned Opcode) {
1433 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1434}
1435
1436/// Check if CmpInstr can be substituted by MI.
1437///
1438/// CmpInstr can be substituted:
1439/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1440/// - and, MI and CmpInstr are from the same MachineBB
1441/// - and, condition flags are not alive in successors of the CmpInstr parent
1442/// - and, if MI opcode is the S form there must be no defs of flags between
1443/// MI and CmpInstr
1444/// or if MI opcode is not the S form there must be neither defs of flags
1445/// nor uses of flags between MI and CmpInstr.
1446/// - and C/V flags are not used after CmpInstr
1447static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
Jessica Paquette809d7082017-07-28 03:21:58 +00001448 const TargetRegisterInfo *TRI) {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001449 assert(MI);
1450 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1451 assert(CmpInstr);
1452
1453 const unsigned CmpOpcode = CmpInstr->getOpcode();
1454 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1455 return false;
1456
1457 if (MI->getParent() != CmpInstr->getParent())
1458 return false;
1459
1460 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1461 return false;
1462
1463 AccessKind AccessToCheck = AK_Write;
1464 if (sForm(*MI) != MI->getOpcode())
1465 AccessToCheck = AK_All;
1466 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1467 return false;
1468
1469 UsedNZCV NZCVUsedAfterCmp;
Jessica Paquette809d7082017-07-28 03:21:58 +00001470 for (auto I = std::next(CmpInstr->getIterator()),
1471 E = CmpInstr->getParent()->instr_end();
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001472 I != E; ++I) {
1473 const MachineInstr &Instr = *I;
1474 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1475 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1476 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1477 return false;
1478 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1479 }
1480
1481 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1482 break;
1483 }
Jessica Paquette809d7082017-07-28 03:21:58 +00001484
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001485 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1486}
1487
1488/// Substitute an instruction comparing to zero with another instruction
1489/// which produces needed condition flags.
1490///
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001491/// Return true on success.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001492bool AArch64InstrInfo::substituteCmpToZero(
1493 MachineInstr &CmpInstr, unsigned SrcReg,
1494 const MachineRegisterInfo *MRI) const {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001495 assert(MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001496 // Get the unique definition of SrcReg.
1497 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1498 if (!MI)
1499 return false;
1500
1501 const TargetRegisterInfo *TRI = &getRegisterInfo();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001502
1503 unsigned NewOpc = sForm(*MI);
1504 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1505 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001506
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001507 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001508 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001509
1510 // Update the instruction to set NZCV.
1511 MI->setDesc(get(NewOpc));
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001512 CmpInstr.eraseFromParent();
1513 bool succeeded = UpdateOperandRegClass(*MI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001514 (void)succeeded;
1515 assert(succeeded && "Some operands reg class are incompatible!");
1516 MI->addRegisterDefined(AArch64::NZCV, TRI);
1517 return true;
1518}
1519
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001520bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1521 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001522 return false;
1523
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001524 MachineBasicBlock &MBB = *MI.getParent();
1525 DebugLoc DL = MI.getDebugLoc();
1526 unsigned Reg = MI.getOperand(0).getReg();
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001527 const GlobalValue *GV =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001528 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001529 const TargetMachine &TM = MBB.getParent()->getTarget();
1530 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1531 const unsigned char MO_NC = AArch64II::MO_NC;
1532
1533 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1534 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1535 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1536 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001537 .addReg(Reg, RegState::Kill)
1538 .addImm(0)
1539 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001540 } else if (TM.getCodeModel() == CodeModel::Large) {
1541 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
Jessica Paquette809d7082017-07-28 03:21:58 +00001542 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1543 .addImm(0);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001544 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1545 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001546 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1547 .addImm(16);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001548 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1549 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001550 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1551 .addImm(32);
Evandro Menezes7960b2e2017-01-18 18:57:08 +00001552 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1553 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001554 .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1555 .addImm(48);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001556 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001557 .addReg(Reg, RegState::Kill)
1558 .addImm(0)
1559 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001560 } else {
1561 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1562 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1563 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1564 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1565 .addReg(Reg, RegState::Kill)
1566 .addGlobalAddress(GV, 0, LoFlags)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001567 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001568 }
1569
1570 MBB.erase(MI);
1571
1572 return true;
1573}
1574
Tim Northover3b0846e2014-05-24 12:50:23 +00001575/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001576bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1577 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001578 default:
1579 break;
1580 case AArch64::ADDSWrs:
1581 case AArch64::ADDSXrs:
1582 case AArch64::ADDWrs:
1583 case AArch64::ADDXrs:
1584 case AArch64::ANDSWrs:
1585 case AArch64::ANDSXrs:
1586 case AArch64::ANDWrs:
1587 case AArch64::ANDXrs:
1588 case AArch64::BICSWrs:
1589 case AArch64::BICSXrs:
1590 case AArch64::BICWrs:
1591 case AArch64::BICXrs:
Tim Northover3b0846e2014-05-24 12:50:23 +00001592 case AArch64::EONWrs:
1593 case AArch64::EONXrs:
1594 case AArch64::EORWrs:
1595 case AArch64::EORXrs:
1596 case AArch64::ORNWrs:
1597 case AArch64::ORNXrs:
1598 case AArch64::ORRWrs:
1599 case AArch64::ORRXrs:
1600 case AArch64::SUBSWrs:
1601 case AArch64::SUBSXrs:
1602 case AArch64::SUBWrs:
1603 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001604 if (MI.getOperand(3).isImm()) {
1605 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001606 return (val != 0);
1607 }
1608 break;
1609 }
1610 return false;
1611}
1612
1613/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001614bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1615 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001616 default:
1617 break;
1618 case AArch64::ADDSWrx:
1619 case AArch64::ADDSXrx:
1620 case AArch64::ADDSXrx64:
1621 case AArch64::ADDWrx:
1622 case AArch64::ADDXrx:
1623 case AArch64::ADDXrx64:
1624 case AArch64::SUBSWrx:
1625 case AArch64::SUBSXrx:
1626 case AArch64::SUBSXrx64:
1627 case AArch64::SUBWrx:
1628 case AArch64::SUBXrx:
1629 case AArch64::SUBXrx64:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001630 if (MI.getOperand(3).isImm()) {
1631 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001632 return (val != 0);
1633 }
1634 break;
1635 }
1636
1637 return false;
1638}
1639
1640// Return true if this instruction simply sets its single destination register
1641// to zero. This is equivalent to a register rename of the zero-register.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001642bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1643 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001644 default:
1645 break;
1646 case AArch64::MOVZWi:
1647 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001648 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1649 assert(MI.getDesc().getNumOperands() == 3 &&
1650 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001651 return true;
1652 }
1653 break;
1654 case AArch64::ANDWri: // and Rd, Rzr, #imm
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001655 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001656 case AArch64::ANDXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001657 return MI.getOperand(1).getReg() == AArch64::XZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001658 case TargetOpcode::COPY:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001659 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001660 }
1661 return false;
1662}
1663
1664// Return true if this instruction simply renames a general register without
1665// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001666bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1667 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001668 default:
1669 break;
1670 case TargetOpcode::COPY: {
1671 // GPR32 copies will by lowered to ORRXrs
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001672 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001673 return (AArch64::GPR32RegClass.contains(DstReg) ||
1674 AArch64::GPR64RegClass.contains(DstReg));
1675 }
1676 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001677 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1678 assert(MI.getDesc().getNumOperands() == 4 &&
1679 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001680 return true;
1681 }
Renato Golin541d7e72014-08-01 17:27:31 +00001682 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001683 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001684 if (MI.getOperand(2).getImm() == 0) {
1685 assert(MI.getDesc().getNumOperands() == 4 &&
1686 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001687 return true;
1688 }
Renato Golin541d7e72014-08-01 17:27:31 +00001689 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001690 }
1691 return false;
1692}
1693
1694// Return true if this instruction simply renames a general register without
1695// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001696bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1697 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001698 default:
1699 break;
1700 case TargetOpcode::COPY: {
1701 // FPR64 copies will by lowered to ORR.16b
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001702 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001703 return (AArch64::FPR64RegClass.contains(DstReg) ||
1704 AArch64::FPR128RegClass.contains(DstReg));
1705 }
1706 case AArch64::ORRv16i8:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001707 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1708 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001709 "invalid ORRv16i8 operands");
1710 return true;
1711 }
Renato Golin541d7e72014-08-01 17:27:31 +00001712 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001713 }
1714 return false;
1715}
1716
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001717unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001718 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001719 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001720 default:
1721 break;
1722 case AArch64::LDRWui:
1723 case AArch64::LDRXui:
1724 case AArch64::LDRBui:
1725 case AArch64::LDRHui:
1726 case AArch64::LDRSui:
1727 case AArch64::LDRDui:
1728 case AArch64::LDRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001729 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1730 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1731 FrameIndex = MI.getOperand(1).getIndex();
1732 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001733 }
1734 break;
1735 }
1736
1737 return 0;
1738}
1739
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001740unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001741 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001742 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001743 default:
1744 break;
1745 case AArch64::STRWui:
1746 case AArch64::STRXui:
1747 case AArch64::STRBui:
1748 case AArch64::STRHui:
1749 case AArch64::STRSui:
1750 case AArch64::STRDui:
1751 case AArch64::STRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001752 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1753 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1754 FrameIndex = MI.getOperand(1).getIndex();
1755 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001756 }
1757 break;
1758 }
1759 return 0;
1760}
1761
1762/// Return true if this is load/store scales or extends its register offset.
1763/// This refers to scaling a dynamic index as opposed to scaled immediates.
1764/// MI should be a memory op that allows scaled addressing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001765bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1766 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001767 default:
1768 break;
1769 case AArch64::LDRBBroW:
1770 case AArch64::LDRBroW:
1771 case AArch64::LDRDroW:
1772 case AArch64::LDRHHroW:
1773 case AArch64::LDRHroW:
1774 case AArch64::LDRQroW:
1775 case AArch64::LDRSBWroW:
1776 case AArch64::LDRSBXroW:
1777 case AArch64::LDRSHWroW:
1778 case AArch64::LDRSHXroW:
1779 case AArch64::LDRSWroW:
1780 case AArch64::LDRSroW:
1781 case AArch64::LDRWroW:
1782 case AArch64::LDRXroW:
1783 case AArch64::STRBBroW:
1784 case AArch64::STRBroW:
1785 case AArch64::STRDroW:
1786 case AArch64::STRHHroW:
1787 case AArch64::STRHroW:
1788 case AArch64::STRQroW:
1789 case AArch64::STRSroW:
1790 case AArch64::STRWroW:
1791 case AArch64::STRXroW:
1792 case AArch64::LDRBBroX:
1793 case AArch64::LDRBroX:
1794 case AArch64::LDRDroX:
1795 case AArch64::LDRHHroX:
1796 case AArch64::LDRHroX:
1797 case AArch64::LDRQroX:
1798 case AArch64::LDRSBWroX:
1799 case AArch64::LDRSBXroX:
1800 case AArch64::LDRSHWroX:
1801 case AArch64::LDRSHXroX:
1802 case AArch64::LDRSWroX:
1803 case AArch64::LDRSroX:
1804 case AArch64::LDRWroX:
1805 case AArch64::LDRXroX:
1806 case AArch64::STRBBroX:
1807 case AArch64::STRBroX:
1808 case AArch64::STRDroX:
1809 case AArch64::STRHHroX:
1810 case AArch64::STRHroX:
1811 case AArch64::STRQroX:
1812 case AArch64::STRSroX:
1813 case AArch64::STRWroX:
1814 case AArch64::STRXroX:
1815
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001816 unsigned Val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001817 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1818 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1819 }
1820 return false;
1821}
1822
1823/// Check all MachineMemOperands for a hint to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001824bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001825 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
Justin Lebar288b3372016-07-14 18:15:20 +00001826 return MMO->getFlags() & MOSuppressPair;
1827 });
Tim Northover3b0846e2014-05-24 12:50:23 +00001828}
1829
1830/// Set a flag on the first MachineMemOperand to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001831void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1832 if (MI.memoperands_empty())
Tim Northover3b0846e2014-05-24 12:50:23 +00001833 return;
Justin Lebar288b3372016-07-14 18:15:20 +00001834 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
Tim Northover3b0846e2014-05-24 12:50:23 +00001835}
1836
Geoff Berryb1e87142017-07-14 21:44:12 +00001837/// Check all MachineMemOperands for a hint that the load/store is strided.
1838bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const {
1839 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1840 return MMO->getFlags() & MOStridedAccess;
1841 });
1842}
1843
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001844bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1845 switch (Opc) {
1846 default:
1847 return false;
1848 case AArch64::STURSi:
1849 case AArch64::STURDi:
1850 case AArch64::STURQi:
1851 case AArch64::STURBBi:
1852 case AArch64::STURHHi:
1853 case AArch64::STURWi:
1854 case AArch64::STURXi:
1855 case AArch64::LDURSi:
1856 case AArch64::LDURDi:
1857 case AArch64::LDURQi:
1858 case AArch64::LDURWi:
1859 case AArch64::LDURXi:
1860 case AArch64::LDURSWi:
1861 case AArch64::LDURHHi:
1862 case AArch64::LDURBBi:
1863 case AArch64::LDURSBWi:
1864 case AArch64::LDURSHWi:
1865 return true;
1866 }
1867}
1868
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001869bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1870 return isUnscaledLdSt(MI.getOpcode());
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001871}
1872
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001873// Is this a candidate for ld/st merging or pairing? For example, we don't
1874// touch volatiles or load/stores that have a hint to avoid pair formation.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001875bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001876 // If this is a volatile load/store, don't mess with it.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001877 if (MI.hasOrderedMemoryRef())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001878 return false;
1879
1880 // Make sure this is a reg+imm (as opposed to an address reloc).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001881 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1882 if (!MI.getOperand(2).isImm())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001883 return false;
1884
1885 // Can't merge/pair if the instruction modifies the base register.
1886 // e.g., ldr x0, [x0]
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001887 unsigned BaseReg = MI.getOperand(1).getReg();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001888 const TargetRegisterInfo *TRI = &getRegisterInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001889 if (MI.modifiesRegister(BaseReg, TRI))
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001890 return false;
1891
1892 // Check if this load/store has a hint to avoid pair formation.
1893 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1894 if (isLdStPairSuppressed(MI))
1895 return false;
1896
Matthias Braun651cff42016-06-02 18:03:53 +00001897 // On some CPUs quad load/store pairs are slower than two single load/stores.
Evandro Menezes7784cac2017-01-24 17:34:31 +00001898 if (Subtarget.isPaired128Slow()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001899 switch (MI.getOpcode()) {
Matthias Braunbcfd2362016-05-28 01:06:51 +00001900 default:
1901 break;
Matthias Braunbcfd2362016-05-28 01:06:51 +00001902 case AArch64::LDURQi:
1903 case AArch64::STURQi:
1904 case AArch64::LDRQui:
1905 case AArch64::STRQui:
1906 return false;
Evandro Menezes8d53f882016-04-13 18:31:45 +00001907 }
Matthias Braunbcfd2362016-05-28 01:06:51 +00001908 }
Evandro Menezes8d53f882016-04-13 18:31:45 +00001909
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001910 return true;
1911}
1912
Chad Rosierc27a18f2016-03-09 16:00:35 +00001913bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001914 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
Chad Rosierc27a18f2016-03-09 16:00:35 +00001915 const TargetRegisterInfo *TRI) const {
Geoff Berry22dfbc52016-08-12 15:26:00 +00001916 unsigned Width;
1917 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001918}
1919
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001920bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001921 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
Chad Rosier3528c1e2014-09-08 14:43:48 +00001922 const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001923 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001924 // Handle only loads/stores with base register followed by immediate offset.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001925 if (LdSt.getNumExplicitOperands() == 3) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001926 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001927 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001928 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001929 } else if (LdSt.getNumExplicitOperands() == 4) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001930 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001931 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1932 !LdSt.getOperand(3).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001933 return false;
1934 } else
Chad Rosier3528c1e2014-09-08 14:43:48 +00001935 return false;
1936
Jessica Paquette809d7082017-07-28 03:21:58 +00001937 // Get the scaling factor for the instruction and set the width for the
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001938 // instruction.
Chad Rosier0da267d2016-03-09 16:46:48 +00001939 unsigned Scale = 0;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001940 int64_t Dummy1, Dummy2;
1941
1942 // If this returns false, then it's an instruction we don't want to handle.
1943 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
Chad Rosier3528c1e2014-09-08 14:43:48 +00001944 return false;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001945
1946 // Compute the offset. Offset is calculated as the immediate operand
1947 // multiplied by the scaling factor. Unscaled instructions have scaling factor
1948 // set to 1.
1949 if (LdSt.getNumExplicitOperands() == 3) {
1950 BaseReg = LdSt.getOperand(1).getReg();
1951 Offset = LdSt.getOperand(2).getImm() * Scale;
1952 } else {
1953 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1954 BaseReg = LdSt.getOperand(2).getReg();
1955 Offset = LdSt.getOperand(3).getImm() * Scale;
1956 }
1957 return true;
1958}
1959
Jessica Paquette809d7082017-07-28 03:21:58 +00001960MachineOperand &
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001961AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
1962 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Jessica Paquette809d7082017-07-28 03:21:58 +00001963 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001964 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1965 return OfsOp;
1966}
1967
1968bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1969 unsigned &Width, int64_t &MinOffset,
1970 int64_t &MaxOffset) const {
1971 switch (Opcode) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001972 // Not a memory operation or something we want to handle.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001973 default:
1974 Scale = Width = 0;
1975 MinOffset = MaxOffset = 0;
1976 return false;
1977 case AArch64::STRWpost:
1978 case AArch64::LDRWpost:
1979 Width = 32;
1980 Scale = 4;
1981 MinOffset = -256;
1982 MaxOffset = 255;
1983 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001984 case AArch64::LDURQi:
1985 case AArch64::STURQi:
1986 Width = 16;
1987 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001988 MinOffset = -256;
1989 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001990 break;
1991 case AArch64::LDURXi:
1992 case AArch64::LDURDi:
1993 case AArch64::STURXi:
1994 case AArch64::STURDi:
1995 Width = 8;
1996 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00001997 MinOffset = -256;
1998 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001999 break;
2000 case AArch64::LDURWi:
2001 case AArch64::LDURSi:
2002 case AArch64::LDURSWi:
2003 case AArch64::STURWi:
2004 case AArch64::STURSi:
2005 Width = 4;
2006 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002007 MinOffset = -256;
2008 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002009 break;
2010 case AArch64::LDURHi:
2011 case AArch64::LDURHHi:
2012 case AArch64::LDURSHXi:
2013 case AArch64::LDURSHWi:
2014 case AArch64::STURHi:
2015 case AArch64::STURHHi:
2016 Width = 2;
2017 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002018 MinOffset = -256;
2019 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002020 break;
2021 case AArch64::LDURBi:
2022 case AArch64::LDURBBi:
2023 case AArch64::LDURSBXi:
2024 case AArch64::LDURSBWi:
2025 case AArch64::STURBi:
2026 case AArch64::STURBBi:
2027 Width = 1;
2028 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002029 MinOffset = -256;
2030 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002031 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002032 case AArch64::LDPQi:
2033 case AArch64::LDNPQi:
2034 case AArch64::STPQi:
2035 case AArch64::STNPQi:
2036 Scale = 16;
2037 Width = 32;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002038 MinOffset = -64;
2039 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002040 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00002041 case AArch64::LDRQui:
2042 case AArch64::STRQui:
2043 Scale = Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002044 MinOffset = 0;
2045 MaxOffset = 4095;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00002046 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002047 case AArch64::LDPXi:
2048 case AArch64::LDPDi:
2049 case AArch64::LDNPXi:
2050 case AArch64::LDNPDi:
2051 case AArch64::STPXi:
2052 case AArch64::STPDi:
2053 case AArch64::STNPXi:
2054 case AArch64::STNPDi:
2055 Scale = 8;
2056 Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002057 MinOffset = -64;
2058 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002059 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002060 case AArch64::LDRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00002061 case AArch64::LDRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002062 case AArch64::STRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00002063 case AArch64::STRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002064 Scale = Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002065 MinOffset = 0;
2066 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002067 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002068 case AArch64::LDPWi:
2069 case AArch64::LDPSi:
2070 case AArch64::LDNPWi:
2071 case AArch64::LDNPSi:
2072 case AArch64::STPWi:
2073 case AArch64::STPSi:
2074 case AArch64::STNPWi:
2075 case AArch64::STNPSi:
2076 Scale = 4;
2077 Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002078 MinOffset = -64;
2079 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002080 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002081 case AArch64::LDRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002082 case AArch64::LDRSui:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002083 case AArch64::LDRSWui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00002084 case AArch64::STRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002085 case AArch64::STRSui:
2086 Scale = Width = 4;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002087 MinOffset = 0;
2088 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002089 break;
Chad Rosier84a0afd2015-09-18 14:13:18 +00002090 case AArch64::LDRHui:
2091 case AArch64::LDRHHui:
2092 case AArch64::STRHui:
2093 case AArch64::STRHHui:
2094 Scale = Width = 2;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002095 MinOffset = 0;
2096 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002097 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00002098 case AArch64::LDRBui:
2099 case AArch64::LDRBBui:
2100 case AArch64::STRBui:
2101 case AArch64::STRBBui:
2102 Scale = Width = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002103 MinOffset = 0;
2104 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002105 break;
Chad Rosier064261d2016-02-01 20:54:36 +00002106 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00002107
Chad Rosier3528c1e2014-09-08 14:43:48 +00002108 return true;
2109}
2110
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002111// Scale the unscaled offsets. Returns false if the unscaled offset can't be
2112// scaled.
2113static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2114 unsigned OffsetStride = 1;
2115 switch (Opc) {
2116 default:
2117 return false;
2118 case AArch64::LDURQi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002119 case AArch64::STURQi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002120 OffsetStride = 16;
2121 break;
2122 case AArch64::LDURXi:
2123 case AArch64::LDURDi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002124 case AArch64::STURXi:
2125 case AArch64::STURDi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002126 OffsetStride = 8;
2127 break;
2128 case AArch64::LDURWi:
2129 case AArch64::LDURSi:
2130 case AArch64::LDURSWi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002131 case AArch64::STURWi:
2132 case AArch64::STURSi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002133 OffsetStride = 4;
2134 break;
2135 }
2136 // If the byte-offset isn't a multiple of the stride, we can't scale this
2137 // offset.
2138 if (Offset % OffsetStride != 0)
2139 return false;
2140
2141 // Convert the byte-offset used by unscaled into an "element" offset used
2142 // by the scaled pair load/store instructions.
2143 Offset /= OffsetStride;
2144 return true;
2145}
2146
2147static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2148 if (FirstOpc == SecondOpc)
2149 return true;
2150 // We can also pair sign-ext and zero-ext instructions.
2151 switch (FirstOpc) {
2152 default:
2153 return false;
2154 case AArch64::LDRWui:
2155 case AArch64::LDURWi:
2156 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2157 case AArch64::LDRSWui:
2158 case AArch64::LDURSWi:
2159 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2160 }
2161 // These instructions can't be paired based on their opcodes.
2162 return false;
2163}
2164
Tim Northover3b0846e2014-05-24 12:50:23 +00002165/// Detect opportunities for ldp/stp formation.
2166///
Sanjoy Dasb666ea32015-06-15 18:44:14 +00002167/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002168bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
Stanislav Mekhanoshin7fe9a5d2017-09-13 22:20:47 +00002169 unsigned BaseReg1,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002170 MachineInstr &SecondLdSt,
Stanislav Mekhanoshin7fe9a5d2017-09-13 22:20:47 +00002171 unsigned BaseReg2,
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002172 unsigned NumLoads) const {
Stanislav Mekhanoshin7fe9a5d2017-09-13 22:20:47 +00002173 if (BaseReg1 != BaseReg2)
2174 return false;
2175
Tim Northover3b0846e2014-05-24 12:50:23 +00002176 // Only cluster up to a single pair.
2177 if (NumLoads > 1)
2178 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002179
Geoff Berry22dfbc52016-08-12 15:26:00 +00002180 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2181 return false;
2182
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002183 // Can we pair these instructions based on their opcodes?
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002184 unsigned FirstOpc = FirstLdSt.getOpcode();
2185 unsigned SecondOpc = SecondLdSt.getOpcode();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002186 if (!canPairLdStOpc(FirstOpc, SecondOpc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002187 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002188
2189 // Can't merge volatiles or load/stores that have a hint to avoid pair
2190 // formation, for example.
2191 if (!isCandidateToMergeOrPair(FirstLdSt) ||
2192 !isCandidateToMergeOrPair(SecondLdSt))
Tim Northover3b0846e2014-05-24 12:50:23 +00002193 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002194
2195 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002196 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002197 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2198 return false;
2199
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002200 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002201 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2202 return false;
2203
2204 // Pairwise instructions have a 7-bit signed offset field.
2205 if (Offset1 > 63 || Offset1 < -64)
2206 return false;
2207
Tim Northover3b0846e2014-05-24 12:50:23 +00002208 // The caller should already have ordered First/SecondLdSt by offset.
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002209 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2210 return Offset1 + 1 == Offset2;
Tim Northover3b0846e2014-05-24 12:50:23 +00002211}
2212
Tim Northover3b0846e2014-05-24 12:50:23 +00002213static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2214 unsigned Reg, unsigned SubIdx,
2215 unsigned State,
2216 const TargetRegisterInfo *TRI) {
2217 if (!SubIdx)
2218 return MIB.addReg(Reg, State);
2219
2220 if (TargetRegisterInfo::isPhysicalRegister(Reg))
2221 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2222 return MIB.addReg(Reg, State, SubIdx);
2223}
2224
2225static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2226 unsigned NumRegs) {
2227 // We really want the positive remainder mod 32 here, that happens to be
2228 // easily obtainable with a mask.
2229 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2230}
2231
Jessica Paquette809d7082017-07-28 03:21:58 +00002232void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2233 MachineBasicBlock::iterator I,
2234 const DebugLoc &DL, unsigned DestReg,
2235 unsigned SrcReg, bool KillSrc,
2236 unsigned Opcode,
2237 ArrayRef<unsigned> Indices) const {
2238 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
Eric Christophera0de2532015-03-18 20:37:30 +00002239 const TargetRegisterInfo *TRI = &getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002240 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2241 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2242 unsigned NumRegs = Indices.size();
2243
2244 int SubReg = 0, End = NumRegs, Incr = 1;
2245 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2246 SubReg = NumRegs - 1;
2247 End = -1;
2248 Incr = -1;
2249 }
2250
2251 for (; SubReg != End; SubReg += Incr) {
James Molloyf8aa57a2015-04-16 11:37:40 +00002252 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
Tim Northover3b0846e2014-05-24 12:50:23 +00002253 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2254 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2255 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2256 }
2257}
2258
2259void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002260 MachineBasicBlock::iterator I,
2261 const DebugLoc &DL, unsigned DestReg,
2262 unsigned SrcReg, bool KillSrc) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002263 if (AArch64::GPR32spRegClass.contains(DestReg) &&
2264 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
Eric Christophera0de2532015-03-18 20:37:30 +00002265 const TargetRegisterInfo *TRI = &getRegisterInfo();
2266
Tim Northover3b0846e2014-05-24 12:50:23 +00002267 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2268 // If either operand is WSP, expand to ADD #0.
2269 if (Subtarget.hasZeroCycleRegMove()) {
2270 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2271 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2272 &AArch64::GPR64spRegClass);
2273 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2274 &AArch64::GPR64spRegClass);
2275 // This instruction is reading and writing X registers. This may upset
2276 // the register scavenger and machine verifier, so we need to indicate
2277 // that we are reading an undefined value from SrcRegX, but a proper
2278 // value from SrcReg.
2279 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2280 .addReg(SrcRegX, RegState::Undef)
2281 .addImm(0)
2282 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2283 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2284 } else {
2285 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2286 .addReg(SrcReg, getKillRegState(KillSrc))
2287 .addImm(0)
2288 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2289 }
2290 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002291 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2292 .addImm(0)
2293 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002294 } else {
2295 if (Subtarget.hasZeroCycleRegMove()) {
2296 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2297 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2298 &AArch64::GPR64spRegClass);
2299 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2300 &AArch64::GPR64spRegClass);
2301 // This instruction is reading and writing X registers. This may upset
2302 // the register scavenger and machine verifier, so we need to indicate
2303 // that we are reading an undefined value from SrcRegX, but a proper
2304 // value from SrcReg.
2305 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2306 .addReg(AArch64::XZR)
2307 .addReg(SrcRegX, RegState::Undef)
2308 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2309 } else {
2310 // Otherwise, expand to ORR WZR.
2311 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2312 .addReg(AArch64::WZR)
2313 .addReg(SrcReg, getKillRegState(KillSrc));
2314 }
2315 }
2316 return;
2317 }
2318
2319 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2320 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2321 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2322 // If either operand is SP, expand to ADD #0.
2323 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2324 .addReg(SrcReg, getKillRegState(KillSrc))
2325 .addImm(0)
2326 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2327 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002328 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2329 .addImm(0)
2330 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002331 } else {
2332 // Otherwise, expand to ORR XZR.
2333 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2334 .addReg(AArch64::XZR)
2335 .addReg(SrcReg, getKillRegState(KillSrc));
2336 }
2337 return;
2338 }
2339
2340 // Copy a DDDD register quad by copying the individual sub-registers.
2341 if (AArch64::DDDDRegClass.contains(DestReg) &&
2342 AArch64::DDDDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002343 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2344 AArch64::dsub2, AArch64::dsub3};
Tim Northover3b0846e2014-05-24 12:50:23 +00002345 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2346 Indices);
2347 return;
2348 }
2349
2350 // Copy a DDD register triple by copying the individual sub-registers.
2351 if (AArch64::DDDRegClass.contains(DestReg) &&
2352 AArch64::DDDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002353 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2354 AArch64::dsub2};
Tim Northover3b0846e2014-05-24 12:50:23 +00002355 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2356 Indices);
2357 return;
2358 }
2359
2360 // Copy a DD register pair by copying the individual sub-registers.
2361 if (AArch64::DDRegClass.contains(DestReg) &&
2362 AArch64::DDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002363 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
Tim Northover3b0846e2014-05-24 12:50:23 +00002364 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2365 Indices);
2366 return;
2367 }
2368
2369 // Copy a QQQQ register quad by copying the individual sub-registers.
2370 if (AArch64::QQQQRegClass.contains(DestReg) &&
2371 AArch64::QQQQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002372 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2373 AArch64::qsub2, AArch64::qsub3};
Tim Northover3b0846e2014-05-24 12:50:23 +00002374 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2375 Indices);
2376 return;
2377 }
2378
2379 // Copy a QQQ register triple by copying the individual sub-registers.
2380 if (AArch64::QQQRegClass.contains(DestReg) &&
2381 AArch64::QQQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002382 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2383 AArch64::qsub2};
Tim Northover3b0846e2014-05-24 12:50:23 +00002384 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2385 Indices);
2386 return;
2387 }
2388
2389 // Copy a QQ register pair by copying the individual sub-registers.
2390 if (AArch64::QQRegClass.contains(DestReg) &&
2391 AArch64::QQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002392 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
Tim Northover3b0846e2014-05-24 12:50:23 +00002393 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2394 Indices);
2395 return;
2396 }
2397
2398 if (AArch64::FPR128RegClass.contains(DestReg) &&
2399 AArch64::FPR128RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002400 if (Subtarget.hasNEON()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002401 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2402 .addReg(SrcReg)
2403 .addReg(SrcReg, getKillRegState(KillSrc));
2404 } else {
2405 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
Jessica Paquette809d7082017-07-28 03:21:58 +00002406 .addReg(AArch64::SP, RegState::Define)
2407 .addReg(SrcReg, getKillRegState(KillSrc))
2408 .addReg(AArch64::SP)
2409 .addImm(-16);
Tim Northover3b0846e2014-05-24 12:50:23 +00002410 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
Jessica Paquette809d7082017-07-28 03:21:58 +00002411 .addReg(AArch64::SP, RegState::Define)
2412 .addReg(DestReg, RegState::Define)
2413 .addReg(AArch64::SP)
2414 .addImm(16);
Tim Northover3b0846e2014-05-24 12:50:23 +00002415 }
2416 return;
2417 }
2418
2419 if (AArch64::FPR64RegClass.contains(DestReg) &&
2420 AArch64::FPR64RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002421 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002422 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2423 &AArch64::FPR128RegClass);
2424 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2425 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002426 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2427 .addReg(SrcReg)
2428 .addReg(SrcReg, getKillRegState(KillSrc));
2429 } else {
2430 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2431 .addReg(SrcReg, getKillRegState(KillSrc));
2432 }
2433 return;
2434 }
2435
2436 if (AArch64::FPR32RegClass.contains(DestReg) &&
2437 AArch64::FPR32RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002438 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002439 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2440 &AArch64::FPR128RegClass);
2441 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2442 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002443 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2444 .addReg(SrcReg)
2445 .addReg(SrcReg, getKillRegState(KillSrc));
2446 } else {
2447 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2448 .addReg(SrcReg, getKillRegState(KillSrc));
2449 }
2450 return;
2451 }
2452
2453 if (AArch64::FPR16RegClass.contains(DestReg) &&
2454 AArch64::FPR16RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002455 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002456 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2457 &AArch64::FPR128RegClass);
2458 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2459 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002460 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2461 .addReg(SrcReg)
2462 .addReg(SrcReg, getKillRegState(KillSrc));
2463 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002464 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2465 &AArch64::FPR32RegClass);
2466 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2467 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002468 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2469 .addReg(SrcReg, getKillRegState(KillSrc));
2470 }
2471 return;
2472 }
2473
2474 if (AArch64::FPR8RegClass.contains(DestReg) &&
2475 AArch64::FPR8RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002476 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002477 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
Tim Northover3b0846e2014-05-24 12:50:23 +00002478 &AArch64::FPR128RegClass);
Eric Christophera0de2532015-03-18 20:37:30 +00002479 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2480 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002481 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2482 .addReg(SrcReg)
2483 .addReg(SrcReg, getKillRegState(KillSrc));
2484 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002485 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2486 &AArch64::FPR32RegClass);
2487 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2488 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002489 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2490 .addReg(SrcReg, getKillRegState(KillSrc));
2491 }
2492 return;
2493 }
2494
2495 // Copies between GPR64 and FPR64.
2496 if (AArch64::FPR64RegClass.contains(DestReg) &&
2497 AArch64::GPR64RegClass.contains(SrcReg)) {
2498 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2499 .addReg(SrcReg, getKillRegState(KillSrc));
2500 return;
2501 }
2502 if (AArch64::GPR64RegClass.contains(DestReg) &&
2503 AArch64::FPR64RegClass.contains(SrcReg)) {
2504 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2505 .addReg(SrcReg, getKillRegState(KillSrc));
2506 return;
2507 }
2508 // Copies between GPR32 and FPR32.
2509 if (AArch64::FPR32RegClass.contains(DestReg) &&
2510 AArch64::GPR32RegClass.contains(SrcReg)) {
2511 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2512 .addReg(SrcReg, getKillRegState(KillSrc));
2513 return;
2514 }
2515 if (AArch64::GPR32RegClass.contains(DestReg) &&
2516 AArch64::FPR32RegClass.contains(SrcReg)) {
2517 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2518 .addReg(SrcReg, getKillRegState(KillSrc));
2519 return;
2520 }
2521
Tim Northover1bed9af2014-05-27 12:16:02 +00002522 if (DestReg == AArch64::NZCV) {
2523 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2524 BuildMI(MBB, I, DL, get(AArch64::MSR))
Jessica Paquette809d7082017-07-28 03:21:58 +00002525 .addImm(AArch64SysReg::NZCV)
2526 .addReg(SrcReg, getKillRegState(KillSrc))
2527 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
Tim Northover1bed9af2014-05-27 12:16:02 +00002528 return;
2529 }
2530
2531 if (SrcReg == AArch64::NZCV) {
2532 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
Quentin Colombet658d9db2016-04-22 18:46:17 +00002533 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
Jessica Paquette809d7082017-07-28 03:21:58 +00002534 .addImm(AArch64SysReg::NZCV)
2535 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
Tim Northover1bed9af2014-05-27 12:16:02 +00002536 return;
2537 }
2538
2539 llvm_unreachable("unimplemented reg-to-reg copy");
Tim Northover3b0846e2014-05-24 12:50:23 +00002540}
2541
2542void AArch64InstrInfo::storeRegToStackSlot(
2543 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2544 bool isKill, int FI, const TargetRegisterClass *RC,
2545 const TargetRegisterInfo *TRI) const {
2546 DebugLoc DL;
2547 if (MBBI != MBB.end())
2548 DL = MBBI->getDebugLoc();
2549 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002550 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002551 unsigned Align = MFI.getObjectAlignment(FI);
2552
Alex Lorenze40c8a22015-08-11 23:09:45 +00002553 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002554 MachineMemOperand *MMO = MF.getMachineMemOperand(
2555 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2556 unsigned Opc = 0;
2557 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002558 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002559 case 1:
2560 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2561 Opc = AArch64::STRBui;
2562 break;
2563 case 2:
2564 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2565 Opc = AArch64::STRHui;
2566 break;
2567 case 4:
2568 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2569 Opc = AArch64::STRWui;
2570 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2571 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2572 else
2573 assert(SrcReg != AArch64::WSP);
2574 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2575 Opc = AArch64::STRSui;
2576 break;
2577 case 8:
2578 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2579 Opc = AArch64::STRXui;
2580 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2581 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2582 else
2583 assert(SrcReg != AArch64::SP);
2584 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2585 Opc = AArch64::STRDui;
2586 break;
2587 case 16:
2588 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2589 Opc = AArch64::STRQui;
2590 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002591 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002592 Opc = AArch64::ST1Twov1d;
2593 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002594 }
2595 break;
2596 case 24:
2597 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002598 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002599 Opc = AArch64::ST1Threev1d;
2600 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002601 }
2602 break;
2603 case 32:
2604 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002605 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002606 Opc = AArch64::ST1Fourv1d;
2607 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002608 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002609 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002610 Opc = AArch64::ST1Twov2d;
2611 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002612 }
2613 break;
2614 case 48:
2615 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002616 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002617 Opc = AArch64::ST1Threev2d;
2618 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002619 }
2620 break;
2621 case 64:
2622 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002623 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002624 Opc = AArch64::ST1Fourv2d;
2625 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002626 }
2627 break;
2628 }
2629 assert(Opc && "Unknown register class");
2630
James Molloyf8aa57a2015-04-16 11:37:40 +00002631 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Jessica Paquette809d7082017-07-28 03:21:58 +00002632 .addReg(SrcReg, getKillRegState(isKill))
2633 .addFrameIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002634
2635 if (Offset)
2636 MI.addImm(0);
2637 MI.addMemOperand(MMO);
2638}
2639
2640void AArch64InstrInfo::loadRegFromStackSlot(
2641 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2642 int FI, const TargetRegisterClass *RC,
2643 const TargetRegisterInfo *TRI) const {
2644 DebugLoc DL;
2645 if (MBBI != MBB.end())
2646 DL = MBBI->getDebugLoc();
2647 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002648 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002649 unsigned Align = MFI.getObjectAlignment(FI);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002650 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002651 MachineMemOperand *MMO = MF.getMachineMemOperand(
2652 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2653
2654 unsigned Opc = 0;
2655 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002656 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002657 case 1:
2658 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2659 Opc = AArch64::LDRBui;
2660 break;
2661 case 2:
2662 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2663 Opc = AArch64::LDRHui;
2664 break;
2665 case 4:
2666 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2667 Opc = AArch64::LDRWui;
2668 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2669 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2670 else
2671 assert(DestReg != AArch64::WSP);
2672 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2673 Opc = AArch64::LDRSui;
2674 break;
2675 case 8:
2676 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2677 Opc = AArch64::LDRXui;
2678 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2679 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2680 else
2681 assert(DestReg != AArch64::SP);
2682 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2683 Opc = AArch64::LDRDui;
2684 break;
2685 case 16:
2686 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2687 Opc = AArch64::LDRQui;
2688 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002689 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002690 Opc = AArch64::LD1Twov1d;
2691 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002692 }
2693 break;
2694 case 24:
2695 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002696 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002697 Opc = AArch64::LD1Threev1d;
2698 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002699 }
2700 break;
2701 case 32:
2702 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002703 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002704 Opc = AArch64::LD1Fourv1d;
2705 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002706 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002707 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002708 Opc = AArch64::LD1Twov2d;
2709 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002710 }
2711 break;
2712 case 48:
2713 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002714 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002715 Opc = AArch64::LD1Threev2d;
2716 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002717 }
2718 break;
2719 case 64:
2720 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002721 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002722 Opc = AArch64::LD1Fourv2d;
2723 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002724 }
2725 break;
2726 }
2727 assert(Opc && "Unknown register class");
2728
James Molloyf8aa57a2015-04-16 11:37:40 +00002729 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Jessica Paquette809d7082017-07-28 03:21:58 +00002730 .addReg(DestReg, getDefRegState(true))
2731 .addFrameIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002732 if (Offset)
2733 MI.addImm(0);
2734 MI.addMemOperand(MMO);
2735}
2736
2737void llvm::emitFrameOffset(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002738 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00002739 unsigned DestReg, unsigned SrcReg, int Offset,
Eric Christopherbc76b972014-06-10 17:33:39 +00002740 const TargetInstrInfo *TII,
Tim Northover3b0846e2014-05-24 12:50:23 +00002741 MachineInstr::MIFlag Flag, bool SetNZCV) {
2742 if (DestReg == SrcReg && Offset == 0)
2743 return;
2744
Geoff Berrya5335642016-05-06 16:34:59 +00002745 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2746 "SP increment/decrement not 16-byte aligned");
2747
Tim Northover3b0846e2014-05-24 12:50:23 +00002748 bool isSub = Offset < 0;
2749 if (isSub)
2750 Offset = -Offset;
2751
2752 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2753 // scratch register. If DestReg is a virtual register, use it as the
2754 // scratch register; otherwise, create a new virtual register (to be
2755 // replaced by the scavenger at the end of PEI). That case can be optimized
2756 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2757 // register can be loaded with offset%8 and the add/sub can use an extending
2758 // instruction with LSL#3.
2759 // Currently the function handles any offsets but generates a poor sequence
2760 // of code.
2761 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2762
2763 unsigned Opc;
2764 if (SetNZCV)
2765 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2766 else
2767 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2768 const unsigned MaxEncoding = 0xfff;
2769 const unsigned ShiftSize = 12;
2770 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2771 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2772 unsigned ThisVal;
2773 if (((unsigned)Offset) > MaxEncodableValue) {
2774 ThisVal = MaxEncodableValue;
2775 } else {
2776 ThisVal = Offset & MaxEncodableValue;
2777 }
2778 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2779 "Encoding cannot handle value that big");
2780 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2781 .addReg(SrcReg)
2782 .addImm(ThisVal >> ShiftSize)
2783 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2784 .setMIFlag(Flag);
2785
2786 SrcReg = DestReg;
2787 Offset -= ThisVal;
2788 if (Offset == 0)
2789 return;
2790 }
2791 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2792 .addReg(SrcReg)
2793 .addImm(Offset)
2794 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2795 .setMIFlag(Flag);
2796}
2797
Keno Fischere70b31f2015-06-08 20:09:58 +00002798MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002799 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
Jonas Paulsson8e5b0c62016-05-10 08:09:37 +00002800 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2801 LiveIntervals *LIS) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002802 // This is a bit of a hack. Consider this instruction:
2803 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002804 // %0<def> = COPY %sp; GPR64all:%0
Tim Northover3b0846e2014-05-24 12:50:23 +00002805 //
2806 // We explicitly chose GPR64all for the virtual register so such a copy might
2807 // be eliminated by RegisterCoalescer. However, that may not be possible, and
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002808 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
Tim Northover3b0846e2014-05-24 12:50:23 +00002809 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2810 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002811 // To prevent that, we are going to constrain the %0 register class here.
Tim Northover3b0846e2014-05-24 12:50:23 +00002812 //
2813 // <rdar://problem/11522048>
2814 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002815 if (MI.isFullCopy()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002816 unsigned DstReg = MI.getOperand(0).getReg();
2817 unsigned SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00002818 if (SrcReg == AArch64::SP &&
2819 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2820 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2821 return nullptr;
2822 }
2823 if (DstReg == AArch64::SP &&
2824 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2825 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2826 return nullptr;
2827 }
2828 }
2829
Geoff Berryd46b6e82017-01-05 21:51:42 +00002830 // Handle the case where a copy is being spilled or filled but the source
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002831 // and destination register class don't match. For example:
Geoff Berry7c078fc2016-11-29 18:28:32 +00002832 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002833 // %0<def> = COPY %xzr; GPR64common:%0
Geoff Berry7c078fc2016-11-29 18:28:32 +00002834 //
2835 // In this case we can still safely fold away the COPY and generate the
2836 // following spill code:
2837 //
Francis Visoiu Mistrih9d7bb0c2017-11-28 17:15:09 +00002838 // STRXui %xzr, <fi#0>
Geoff Berry7c078fc2016-11-29 18:28:32 +00002839 //
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002840 // This also eliminates spilled cross register class COPYs (e.g. between x and
2841 // d regs) of the same size. For example:
2842 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002843 // %0<def> = COPY %1; GPR64:%0, FPR64:%1
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002844 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002845 // will be filled as
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002846 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002847 // LDRDui %0, fi<#0>
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002848 //
2849 // instead of
2850 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002851 // LDRXui %Temp, fi<#0>
2852 // %0 = FMOV %Temp
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002853 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00002854 if (MI.isCopy() && Ops.size() == 1 &&
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002855 // Make sure we're only folding the explicit COPY defs/uses.
2856 (Ops[0] == 0 || Ops[0] == 1)) {
Geoff Berryd46b6e82017-01-05 21:51:42 +00002857 bool IsSpill = Ops[0] == 0;
2858 bool IsFill = !IsSpill;
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002859 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
2860 const MachineRegisterInfo &MRI = MF.getRegInfo();
Geoff Berry7c078fc2016-11-29 18:28:32 +00002861 MachineBasicBlock &MBB = *MI.getParent();
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002862 const MachineOperand &DstMO = MI.getOperand(0);
Geoff Berry7c078fc2016-11-29 18:28:32 +00002863 const MachineOperand &SrcMO = MI.getOperand(1);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002864 unsigned DstReg = DstMO.getReg();
Geoff Berry7c078fc2016-11-29 18:28:32 +00002865 unsigned SrcReg = SrcMO.getReg();
Geoff Berryd46b6e82017-01-05 21:51:42 +00002866 // This is slightly expensive to compute for physical regs since
2867 // getMinimalPhysRegClass is slow.
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002868 auto getRegClass = [&](unsigned Reg) {
2869 return TargetRegisterInfo::isVirtualRegister(Reg)
2870 ? MRI.getRegClass(Reg)
2871 : TRI.getMinimalPhysRegClass(Reg);
2872 };
Geoff Berryd46b6e82017-01-05 21:51:42 +00002873
2874 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002875 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
Jessica Paquette809d7082017-07-28 03:21:58 +00002876 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00002877 "Mismatched register size in non subreg COPY");
2878 if (IsSpill)
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002879 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
Geoff Berryd46b6e82017-01-05 21:51:42 +00002880 getRegClass(SrcReg), &TRI);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00002881 else
Geoff Berryd46b6e82017-01-05 21:51:42 +00002882 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
2883 getRegClass(DstReg), &TRI);
Geoff Berry7c078fc2016-11-29 18:28:32 +00002884 return &*--InsertPt;
2885 }
Geoff Berryd46b6e82017-01-05 21:51:42 +00002886
2887 // Handle cases like spilling def of:
2888 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002889 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
Geoff Berryd46b6e82017-01-05 21:51:42 +00002890 //
2891 // where the physical register source can be widened and stored to the full
2892 // virtual reg destination stack slot, in this case producing:
2893 //
Francis Visoiu Mistrih9d7bb0c2017-11-28 17:15:09 +00002894 // STRXui %xzr, <fi#0>
Geoff Berryd46b6e82017-01-05 21:51:42 +00002895 //
2896 if (IsSpill && DstMO.isUndef() &&
2897 TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2898 assert(SrcMO.getSubReg() == 0 &&
2899 "Unexpected subreg on physical register");
2900 const TargetRegisterClass *SpillRC;
2901 unsigned SpillSubreg;
2902 switch (DstMO.getSubReg()) {
2903 default:
2904 SpillRC = nullptr;
2905 break;
2906 case AArch64::sub_32:
2907 case AArch64::ssub:
2908 if (AArch64::GPR32RegClass.contains(SrcReg)) {
2909 SpillRC = &AArch64::GPR64RegClass;
2910 SpillSubreg = AArch64::sub_32;
2911 } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
2912 SpillRC = &AArch64::FPR64RegClass;
2913 SpillSubreg = AArch64::ssub;
2914 } else
2915 SpillRC = nullptr;
2916 break;
2917 case AArch64::dsub:
2918 if (AArch64::FPR64RegClass.contains(SrcReg)) {
2919 SpillRC = &AArch64::FPR128RegClass;
2920 SpillSubreg = AArch64::dsub;
2921 } else
2922 SpillRC = nullptr;
2923 break;
2924 }
2925
2926 if (SpillRC)
2927 if (unsigned WidenedSrcReg =
2928 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
2929 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
2930 FrameIndex, SpillRC, &TRI);
2931 return &*--InsertPt;
2932 }
2933 }
2934
2935 // Handle cases like filling use of:
2936 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002937 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
Geoff Berryd46b6e82017-01-05 21:51:42 +00002938 //
2939 // where we can load the full virtual reg source stack slot, into the subreg
2940 // destination, in this case producing:
2941 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00002942 // LDRWui %0:sub_32<def,read-undef>, <fi#0>
Geoff Berryd46b6e82017-01-05 21:51:42 +00002943 //
2944 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
2945 const TargetRegisterClass *FillRC;
2946 switch (DstMO.getSubReg()) {
2947 default:
2948 FillRC = nullptr;
2949 break;
2950 case AArch64::sub_32:
2951 FillRC = &AArch64::GPR32RegClass;
2952 break;
2953 case AArch64::ssub:
2954 FillRC = &AArch64::FPR32RegClass;
2955 break;
2956 case AArch64::dsub:
2957 FillRC = &AArch64::FPR64RegClass;
2958 break;
2959 }
2960
2961 if (FillRC) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002962 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
2963 TRI.getRegSizeInBits(*FillRC) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00002964 "Mismatched regclass size on folded subreg COPY");
2965 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
2966 MachineInstr &LoadMI = *--InsertPt;
2967 MachineOperand &LoadDst = LoadMI.getOperand(0);
2968 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
2969 LoadDst.setSubReg(DstMO.getSubReg());
2970 LoadDst.setIsUndef();
2971 return &LoadMI;
2972 }
2973 }
Geoff Berry7c078fc2016-11-29 18:28:32 +00002974 }
2975
Tim Northover3b0846e2014-05-24 12:50:23 +00002976 // Cannot fold.
2977 return nullptr;
2978}
2979
2980int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2981 bool *OutUseUnscaledOp,
2982 unsigned *OutUnscaledOp,
2983 int *EmittableOffset) {
2984 int Scale = 1;
2985 bool IsSigned = false;
2986 // The ImmIdx should be changed case by case if it is not 2.
2987 unsigned ImmIdx = 2;
2988 unsigned UnscaledOp = 0;
2989 // Set output values in case of early exit.
2990 if (EmittableOffset)
2991 *EmittableOffset = 0;
2992 if (OutUseUnscaledOp)
2993 *OutUseUnscaledOp = false;
2994 if (OutUnscaledOp)
2995 *OutUnscaledOp = 0;
2996 switch (MI.getOpcode()) {
2997 default:
Craig Topper2a30d782014-06-18 05:05:13 +00002998 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
Tim Northover3b0846e2014-05-24 12:50:23 +00002999 // Vector spills/fills can't take an immediate offset.
3000 case AArch64::LD1Twov2d:
3001 case AArch64::LD1Threev2d:
3002 case AArch64::LD1Fourv2d:
3003 case AArch64::LD1Twov1d:
3004 case AArch64::LD1Threev1d:
3005 case AArch64::LD1Fourv1d:
3006 case AArch64::ST1Twov2d:
3007 case AArch64::ST1Threev2d:
3008 case AArch64::ST1Fourv2d:
3009 case AArch64::ST1Twov1d:
3010 case AArch64::ST1Threev1d:
3011 case AArch64::ST1Fourv1d:
3012 return AArch64FrameOffsetCannotUpdate;
3013 case AArch64::PRFMui:
3014 Scale = 8;
3015 UnscaledOp = AArch64::PRFUMi;
3016 break;
3017 case AArch64::LDRXui:
3018 Scale = 8;
3019 UnscaledOp = AArch64::LDURXi;
3020 break;
3021 case AArch64::LDRWui:
3022 Scale = 4;
3023 UnscaledOp = AArch64::LDURWi;
3024 break;
3025 case AArch64::LDRBui:
3026 Scale = 1;
3027 UnscaledOp = AArch64::LDURBi;
3028 break;
3029 case AArch64::LDRHui:
3030 Scale = 2;
3031 UnscaledOp = AArch64::LDURHi;
3032 break;
3033 case AArch64::LDRSui:
3034 Scale = 4;
3035 UnscaledOp = AArch64::LDURSi;
3036 break;
3037 case AArch64::LDRDui:
3038 Scale = 8;
3039 UnscaledOp = AArch64::LDURDi;
3040 break;
3041 case AArch64::LDRQui:
3042 Scale = 16;
3043 UnscaledOp = AArch64::LDURQi;
3044 break;
3045 case AArch64::LDRBBui:
3046 Scale = 1;
3047 UnscaledOp = AArch64::LDURBBi;
3048 break;
3049 case AArch64::LDRHHui:
3050 Scale = 2;
3051 UnscaledOp = AArch64::LDURHHi;
3052 break;
3053 case AArch64::LDRSBXui:
3054 Scale = 1;
3055 UnscaledOp = AArch64::LDURSBXi;
3056 break;
3057 case AArch64::LDRSBWui:
3058 Scale = 1;
3059 UnscaledOp = AArch64::LDURSBWi;
3060 break;
3061 case AArch64::LDRSHXui:
3062 Scale = 2;
3063 UnscaledOp = AArch64::LDURSHXi;
3064 break;
3065 case AArch64::LDRSHWui:
3066 Scale = 2;
3067 UnscaledOp = AArch64::LDURSHWi;
3068 break;
3069 case AArch64::LDRSWui:
3070 Scale = 4;
3071 UnscaledOp = AArch64::LDURSWi;
3072 break;
3073
3074 case AArch64::STRXui:
3075 Scale = 8;
3076 UnscaledOp = AArch64::STURXi;
3077 break;
3078 case AArch64::STRWui:
3079 Scale = 4;
3080 UnscaledOp = AArch64::STURWi;
3081 break;
3082 case AArch64::STRBui:
3083 Scale = 1;
3084 UnscaledOp = AArch64::STURBi;
3085 break;
3086 case AArch64::STRHui:
3087 Scale = 2;
3088 UnscaledOp = AArch64::STURHi;
3089 break;
3090 case AArch64::STRSui:
3091 Scale = 4;
3092 UnscaledOp = AArch64::STURSi;
3093 break;
3094 case AArch64::STRDui:
3095 Scale = 8;
3096 UnscaledOp = AArch64::STURDi;
3097 break;
3098 case AArch64::STRQui:
3099 Scale = 16;
3100 UnscaledOp = AArch64::STURQi;
3101 break;
3102 case AArch64::STRBBui:
3103 Scale = 1;
3104 UnscaledOp = AArch64::STURBBi;
3105 break;
3106 case AArch64::STRHHui:
3107 Scale = 2;
3108 UnscaledOp = AArch64::STURHHi;
3109 break;
3110
3111 case AArch64::LDPXi:
3112 case AArch64::LDPDi:
3113 case AArch64::STPXi:
3114 case AArch64::STPDi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003115 case AArch64::LDNPXi:
3116 case AArch64::LDNPDi:
3117 case AArch64::STNPXi:
3118 case AArch64::STNPDi:
3119 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003120 IsSigned = true;
3121 Scale = 8;
3122 break;
3123 case AArch64::LDPQi:
3124 case AArch64::STPQi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003125 case AArch64::LDNPQi:
3126 case AArch64::STNPQi:
3127 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003128 IsSigned = true;
3129 Scale = 16;
3130 break;
3131 case AArch64::LDPWi:
3132 case AArch64::LDPSi:
3133 case AArch64::STPWi:
3134 case AArch64::STPSi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003135 case AArch64::LDNPWi:
3136 case AArch64::LDNPSi:
3137 case AArch64::STNPWi:
3138 case AArch64::STNPSi:
3139 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003140 IsSigned = true;
3141 Scale = 4;
3142 break;
3143
3144 case AArch64::LDURXi:
3145 case AArch64::LDURWi:
3146 case AArch64::LDURBi:
3147 case AArch64::LDURHi:
3148 case AArch64::LDURSi:
3149 case AArch64::LDURDi:
3150 case AArch64::LDURQi:
3151 case AArch64::LDURHHi:
3152 case AArch64::LDURBBi:
3153 case AArch64::LDURSBXi:
3154 case AArch64::LDURSBWi:
3155 case AArch64::LDURSHXi:
3156 case AArch64::LDURSHWi:
3157 case AArch64::LDURSWi:
3158 case AArch64::STURXi:
3159 case AArch64::STURWi:
3160 case AArch64::STURBi:
3161 case AArch64::STURHi:
3162 case AArch64::STURSi:
3163 case AArch64::STURDi:
3164 case AArch64::STURQi:
3165 case AArch64::STURBBi:
3166 case AArch64::STURHHi:
3167 Scale = 1;
3168 break;
3169 }
3170
3171 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3172
3173 bool useUnscaledOp = false;
3174 // If the offset doesn't match the scale, we rewrite the instruction to
3175 // use the unscaled instruction instead. Likewise, if we have a negative
3176 // offset (and have an unscaled op to use).
3177 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3178 useUnscaledOp = true;
3179
3180 // Use an unscaled addressing mode if the instruction has a negative offset
3181 // (or if the instruction is already using an unscaled addressing mode).
3182 unsigned MaskBits;
3183 if (IsSigned) {
3184 // ldp/stp instructions.
3185 MaskBits = 7;
3186 Offset /= Scale;
3187 } else if (UnscaledOp == 0 || useUnscaledOp) {
3188 MaskBits = 9;
3189 IsSigned = true;
3190 Scale = 1;
3191 } else {
3192 MaskBits = 12;
3193 IsSigned = false;
3194 Offset /= Scale;
3195 }
3196
3197 // Attempt to fold address computation.
3198 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3199 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3200 if (Offset >= MinOff && Offset <= MaxOff) {
3201 if (EmittableOffset)
3202 *EmittableOffset = Offset;
3203 Offset = 0;
3204 } else {
3205 int NewOff = Offset < 0 ? MinOff : MaxOff;
3206 if (EmittableOffset)
3207 *EmittableOffset = NewOff;
3208 Offset = (Offset - NewOff) * Scale;
3209 }
3210 if (OutUseUnscaledOp)
3211 *OutUseUnscaledOp = useUnscaledOp;
3212 if (OutUnscaledOp)
3213 *OutUnscaledOp = UnscaledOp;
3214 return AArch64FrameOffsetCanUpdate |
3215 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3216}
3217
3218bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3219 unsigned FrameReg, int &Offset,
3220 const AArch64InstrInfo *TII) {
3221 unsigned Opcode = MI.getOpcode();
3222 unsigned ImmIdx = FrameRegIdx + 1;
3223
3224 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3225 Offset += MI.getOperand(ImmIdx).getImm();
3226 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3227 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3228 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3229 MI.eraseFromParent();
3230 Offset = 0;
3231 return true;
3232 }
3233
3234 int NewOffset;
3235 unsigned UnscaledOp;
3236 bool UseUnscaledOp;
3237 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3238 &UnscaledOp, &NewOffset);
3239 if (Status & AArch64FrameOffsetCanUpdate) {
3240 if (Status & AArch64FrameOffsetIsLegal)
3241 // Replace the FrameIndex with FrameReg.
3242 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3243 if (UseUnscaledOp)
3244 MI.setDesc(TII->get(UnscaledOp));
3245
3246 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3247 return Offset == 0;
3248 }
3249
3250 return false;
3251}
3252
Hans Wennborg9b9a5352017-04-21 21:48:41 +00003253void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00003254 NopInst.setOpcode(AArch64::HINT);
Jim Grosbache9119e42015-05-13 18:37:00 +00003255 NopInst.addOperand(MCOperand::createImm(0));
Tim Northover3b0846e2014-05-24 12:50:23 +00003256}
Chad Rosier9d1a5562016-05-02 14:56:21 +00003257
3258// AArch64 supports MachineCombiner.
Jessica Paquette809d7082017-07-28 03:21:58 +00003259bool AArch64InstrInfo::useMachineCombiner() const { return true; }
Eugene Zelenko049b0172017-01-06 00:30:53 +00003260
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003261// True when Opc sets flag
3262static bool isCombineInstrSettingFlag(unsigned Opc) {
3263 switch (Opc) {
3264 case AArch64::ADDSWrr:
3265 case AArch64::ADDSWri:
3266 case AArch64::ADDSXrr:
3267 case AArch64::ADDSXri:
3268 case AArch64::SUBSWrr:
3269 case AArch64::SUBSXrr:
3270 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3271 case AArch64::SUBSWri:
3272 case AArch64::SUBSXri:
3273 return true;
3274 default:
3275 break;
3276 }
3277 return false;
3278}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003279
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003280// 32b Opcodes that can be combined with a MUL
3281static bool isCombineInstrCandidate32(unsigned Opc) {
3282 switch (Opc) {
3283 case AArch64::ADDWrr:
3284 case AArch64::ADDWri:
3285 case AArch64::SUBWrr:
3286 case AArch64::ADDSWrr:
3287 case AArch64::ADDSWri:
3288 case AArch64::SUBSWrr:
3289 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3290 case AArch64::SUBWri:
3291 case AArch64::SUBSWri:
3292 return true;
3293 default:
3294 break;
3295 }
3296 return false;
3297}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003298
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003299// 64b Opcodes that can be combined with a MUL
3300static bool isCombineInstrCandidate64(unsigned Opc) {
3301 switch (Opc) {
3302 case AArch64::ADDXrr:
3303 case AArch64::ADDXri:
3304 case AArch64::SUBXrr:
3305 case AArch64::ADDSXrr:
3306 case AArch64::ADDSXri:
3307 case AArch64::SUBSXrr:
3308 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3309 case AArch64::SUBXri:
3310 case AArch64::SUBSXri:
3311 return true;
3312 default:
3313 break;
3314 }
3315 return false;
3316}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003317
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003318// FP Opcodes that can be combined with a FMUL
3319static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3320 switch (Inst.getOpcode()) {
Evandro Menezes19b2aed2016-09-15 19:55:23 +00003321 default:
3322 break;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003323 case AArch64::FADDSrr:
3324 case AArch64::FADDDrr:
3325 case AArch64::FADDv2f32:
3326 case AArch64::FADDv2f64:
3327 case AArch64::FADDv4f32:
3328 case AArch64::FSUBSrr:
3329 case AArch64::FSUBDrr:
3330 case AArch64::FSUBv2f32:
3331 case AArch64::FSUBv2f64:
3332 case AArch64::FSUBv4f32:
Logan Chience542ee2017-01-05 23:41:33 +00003333 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3334 return (Options.UnsafeFPMath ||
3335 Options.AllowFPOpFusion == FPOpFusion::Fast);
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003336 }
3337 return false;
3338}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003339
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003340// Opcodes that can be combined with a MUL
3341static bool isCombineInstrCandidate(unsigned Opc) {
3342 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3343}
3344
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003345//
3346// Utility routine that checks if \param MO is defined by an
3347// \param CombineOpc instruction in the basic block \param MBB
3348static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3349 unsigned CombineOpc, unsigned ZeroReg = 0,
3350 bool CheckZeroReg = false) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003351 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3352 MachineInstr *MI = nullptr;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003353
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003354 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3355 MI = MRI.getUniqueVRegDef(MO.getReg());
3356 // And it needs to be in the trace (otherwise, it won't have a depth).
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003357 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003358 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003359 // Must only used by the user we combine with.
Gerolf Hoflehnerfe2c11f2014-08-13 22:07:36 +00003360 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003361 return false;
3362
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003363 if (CheckZeroReg) {
3364 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3365 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3366 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3367 // The third input reg must be zero.
3368 if (MI->getOperand(3).getReg() != ZeroReg)
3369 return false;
3370 }
3371
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003372 return true;
3373}
3374
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003375//
3376// Is \param MO defined by an integer multiply and can be combined?
3377static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3378 unsigned MulOpc, unsigned ZeroReg) {
3379 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3380}
3381
3382//
3383// Is \param MO defined by a floating-point multiply and can be combined?
3384static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3385 unsigned MulOpc) {
3386 return canCombine(MBB, MO, MulOpc);
3387}
3388
Haicheng Wu08b94622016-01-07 04:01:02 +00003389// TODO: There are many more machine instruction opcodes to match:
3390// 1. Other data types (integer, vectors)
3391// 2. Other math / logic operations (xor, or)
3392// 3. Other forms of the same operation (intrinsics and other variants)
Jessica Paquette809d7082017-07-28 03:21:58 +00003393bool AArch64InstrInfo::isAssociativeAndCommutative(
3394 const MachineInstr &Inst) const {
Haicheng Wu08b94622016-01-07 04:01:02 +00003395 switch (Inst.getOpcode()) {
3396 case AArch64::FADDDrr:
3397 case AArch64::FADDSrr:
3398 case AArch64::FADDv2f32:
3399 case AArch64::FADDv2f64:
3400 case AArch64::FADDv4f32:
3401 case AArch64::FMULDrr:
3402 case AArch64::FMULSrr:
3403 case AArch64::FMULX32:
3404 case AArch64::FMULX64:
3405 case AArch64::FMULXv2f32:
3406 case AArch64::FMULXv2f64:
3407 case AArch64::FMULXv4f32:
3408 case AArch64::FMULv2f32:
3409 case AArch64::FMULv2f64:
3410 case AArch64::FMULv4f32:
3411 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3412 default:
3413 return false;
3414 }
3415}
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003416
Haicheng Wu08b94622016-01-07 04:01:02 +00003417/// Find instructions that can be turned into madd.
3418static bool getMaddPatterns(MachineInstr &Root,
3419 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003420 unsigned Opc = Root.getOpcode();
3421 MachineBasicBlock &MBB = *Root.getParent();
3422 bool Found = false;
3423
3424 if (!isCombineInstrCandidate(Opc))
Chad Rosier85c85942016-03-23 20:07:28 +00003425 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003426 if (isCombineInstrSettingFlag(Opc)) {
3427 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3428 // When NZCV is live bail out.
3429 if (Cmp_NZCV == -1)
Chad Rosier85c85942016-03-23 20:07:28 +00003430 return false;
Chad Rosier6db9ff62017-06-23 19:20:12 +00003431 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003432 // When opcode can't change bail out.
3433 // CHECKME: do we miss any cases for opcode conversion?
3434 if (NewOpc == Opc)
Chad Rosier85c85942016-03-23 20:07:28 +00003435 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003436 Opc = NewOpc;
3437 }
3438
3439 switch (Opc) {
3440 default:
3441 break;
3442 case AArch64::ADDWrr:
3443 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3444 "ADDWrr does not have register operands");
3445 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3446 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003447 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003448 Found = true;
3449 }
3450 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3451 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003452 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003453 Found = true;
3454 }
3455 break;
3456 case AArch64::ADDXrr:
3457 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3458 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003459 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003460 Found = true;
3461 }
3462 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3463 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003464 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003465 Found = true;
3466 }
3467 break;
3468 case AArch64::SUBWrr:
3469 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3470 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003471 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003472 Found = true;
3473 }
3474 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3475 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003476 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003477 Found = true;
3478 }
3479 break;
3480 case AArch64::SUBXrr:
3481 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3482 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003483 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003484 Found = true;
3485 }
3486 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3487 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003488 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003489 Found = true;
3490 }
3491 break;
3492 case AArch64::ADDWri:
3493 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3494 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003495 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003496 Found = true;
3497 }
3498 break;
3499 case AArch64::ADDXri:
3500 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3501 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003502 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003503 Found = true;
3504 }
3505 break;
3506 case AArch64::SUBWri:
3507 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3508 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003509 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003510 Found = true;
3511 }
3512 break;
3513 case AArch64::SUBXri:
3514 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3515 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003516 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003517 Found = true;
3518 }
3519 break;
3520 }
3521 return Found;
3522}
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003523/// Floating-Point Support
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003524
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003525/// Find instructions that can be turned into madd.
3526static bool getFMAPatterns(MachineInstr &Root,
3527 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3528
3529 if (!isCombineInstrCandidateFP(Root))
Eugene Zelenko049b0172017-01-06 00:30:53 +00003530 return false;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003531
3532 MachineBasicBlock &MBB = *Root.getParent();
3533 bool Found = false;
3534
3535 switch (Root.getOpcode()) {
3536 default:
3537 assert(false && "Unsupported FP instruction in combiner\n");
3538 break;
3539 case AArch64::FADDSrr:
3540 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3541 "FADDWrr does not have register operands");
3542 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3543 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3544 Found = true;
3545 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3546 AArch64::FMULv1i32_indexed)) {
3547 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3548 Found = true;
3549 }
3550 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3551 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3552 Found = true;
3553 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3554 AArch64::FMULv1i32_indexed)) {
3555 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3556 Found = true;
3557 }
3558 break;
3559 case AArch64::FADDDrr:
3560 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3561 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3562 Found = true;
3563 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3564 AArch64::FMULv1i64_indexed)) {
3565 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3566 Found = true;
3567 }
3568 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3569 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3570 Found = true;
3571 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3572 AArch64::FMULv1i64_indexed)) {
3573 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3574 Found = true;
3575 }
3576 break;
3577 case AArch64::FADDv2f32:
3578 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3579 AArch64::FMULv2i32_indexed)) {
3580 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3581 Found = true;
3582 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3583 AArch64::FMULv2f32)) {
3584 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3585 Found = true;
3586 }
3587 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3588 AArch64::FMULv2i32_indexed)) {
3589 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3590 Found = true;
3591 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3592 AArch64::FMULv2f32)) {
3593 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3594 Found = true;
3595 }
3596 break;
3597 case AArch64::FADDv2f64:
3598 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3599 AArch64::FMULv2i64_indexed)) {
3600 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3601 Found = true;
3602 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3603 AArch64::FMULv2f64)) {
3604 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3605 Found = true;
3606 }
3607 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3608 AArch64::FMULv2i64_indexed)) {
3609 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3610 Found = true;
3611 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3612 AArch64::FMULv2f64)) {
3613 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3614 Found = true;
3615 }
3616 break;
3617 case AArch64::FADDv4f32:
3618 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3619 AArch64::FMULv4i32_indexed)) {
3620 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3621 Found = true;
3622 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3623 AArch64::FMULv4f32)) {
3624 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3625 Found = true;
3626 }
3627 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3628 AArch64::FMULv4i32_indexed)) {
3629 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3630 Found = true;
3631 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3632 AArch64::FMULv4f32)) {
3633 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3634 Found = true;
3635 }
3636 break;
3637
3638 case AArch64::FSUBSrr:
3639 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3640 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3641 Found = true;
3642 }
3643 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3644 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3645 Found = true;
3646 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3647 AArch64::FMULv1i32_indexed)) {
3648 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3649 Found = true;
3650 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00003651 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3652 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3653 Found = true;
3654 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003655 break;
3656 case AArch64::FSUBDrr:
3657 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3658 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3659 Found = true;
3660 }
3661 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3662 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3663 Found = true;
3664 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3665 AArch64::FMULv1i64_indexed)) {
3666 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3667 Found = true;
3668 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00003669 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3670 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3671 Found = true;
3672 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003673 break;
3674 case AArch64::FSUBv2f32:
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003675 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3676 AArch64::FMULv2i32_indexed)) {
3677 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
3678 Found = true;
3679 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3680 AArch64::FMULv2f32)) {
3681 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
3682 Found = true;
3683 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003684 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3685 AArch64::FMULv2i32_indexed)) {
3686 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3687 Found = true;
3688 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3689 AArch64::FMULv2f32)) {
3690 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3691 Found = true;
3692 }
3693 break;
3694 case AArch64::FSUBv2f64:
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003695 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3696 AArch64::FMULv2i64_indexed)) {
3697 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
3698 Found = true;
3699 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3700 AArch64::FMULv2f64)) {
3701 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
3702 Found = true;
3703 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003704 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3705 AArch64::FMULv2i64_indexed)) {
3706 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3707 Found = true;
3708 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3709 AArch64::FMULv2f64)) {
3710 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3711 Found = true;
3712 }
3713 break;
3714 case AArch64::FSUBv4f32:
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003715 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3716 AArch64::FMULv4i32_indexed)) {
3717 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3718 Found = true;
3719 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3720 AArch64::FMULv4f32)) {
3721 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
3722 Found = true;
3723 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003724 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3725 AArch64::FMULv4i32_indexed)) {
3726 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3727 Found = true;
3728 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3729 AArch64::FMULv4f32)) {
3730 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3731 Found = true;
3732 }
3733 break;
3734 }
3735 return Found;
3736}
3737
3738/// Return true when a code sequence can improve throughput. It
3739/// should be called only for instructions in loops.
3740/// \param Pattern - combiner pattern
Jessica Paquette809d7082017-07-28 03:21:58 +00003741bool AArch64InstrInfo::isThroughputPattern(
3742 MachineCombinerPattern Pattern) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003743 switch (Pattern) {
3744 default:
3745 break;
3746 case MachineCombinerPattern::FMULADDS_OP1:
3747 case MachineCombinerPattern::FMULADDS_OP2:
3748 case MachineCombinerPattern::FMULSUBS_OP1:
3749 case MachineCombinerPattern::FMULSUBS_OP2:
3750 case MachineCombinerPattern::FMULADDD_OP1:
3751 case MachineCombinerPattern::FMULADDD_OP2:
3752 case MachineCombinerPattern::FMULSUBD_OP1:
3753 case MachineCombinerPattern::FMULSUBD_OP2:
Chad Rosieraeffffd2017-05-11 20:07:24 +00003754 case MachineCombinerPattern::FNMULSUBS_OP1:
3755 case MachineCombinerPattern::FNMULSUBD_OP1:
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003756 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3757 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3758 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3759 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3760 case MachineCombinerPattern::FMLAv2f32_OP2:
3761 case MachineCombinerPattern::FMLAv2f32_OP1:
3762 case MachineCombinerPattern::FMLAv2f64_OP1:
3763 case MachineCombinerPattern::FMLAv2f64_OP2:
3764 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3765 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3766 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3767 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3768 case MachineCombinerPattern::FMLAv4f32_OP1:
3769 case MachineCombinerPattern::FMLAv4f32_OP2:
3770 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3771 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3772 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3773 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3774 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3775 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3776 case MachineCombinerPattern::FMLSv2f32_OP2:
3777 case MachineCombinerPattern::FMLSv2f64_OP2:
3778 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3779 case MachineCombinerPattern::FMLSv4f32_OP2:
3780 return true;
3781 } // end switch (Pattern)
3782 return false;
3783}
Haicheng Wu08b94622016-01-07 04:01:02 +00003784/// Return true when there is potentially a faster code sequence for an
3785/// instruction chain ending in \p Root. All potential patterns are listed in
3786/// the \p Pattern vector. Pattern should be sorted in priority order since the
3787/// pattern evaluator stops checking as soon as it finds a faster sequence.
3788
3789bool AArch64InstrInfo::getMachineCombinerPatterns(
3790 MachineInstr &Root,
3791 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003792 // Integer patterns
Haicheng Wu08b94622016-01-07 04:01:02 +00003793 if (getMaddPatterns(Root, Patterns))
3794 return true;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003795 // Floating point patterns
3796 if (getFMAPatterns(Root, Patterns))
3797 return true;
Haicheng Wu08b94622016-01-07 04:01:02 +00003798
3799 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3800}
3801
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003802enum class FMAInstKind { Default, Indexed, Accumulator };
3803/// genFusedMultiply - Generate fused multiply instructions.
3804/// This function supports both integer and floating point instructions.
3805/// A typical example:
3806/// F|MUL I=A,B,0
3807/// F|ADD R,I,C
3808/// ==> F|MADD R,A,B,C
Joel Jones7466ccf2017-07-10 22:11:50 +00003809/// \param MF Containing MachineFunction
3810/// \param MRI Register information
3811/// \param TII Target information
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003812/// \param Root is the F|ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003813/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003814/// contain the generated madd instruction
3815/// \param IdxMulOpd is index of operand in Root that is the result of
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003816/// the F|MUL. In the example above IdxMulOpd is 1.
3817/// \param MaddOpc the opcode fo the f|madd instruction
Joel Jones7466ccf2017-07-10 22:11:50 +00003818/// \param RC Register class of operands
3819/// \param kind of fma instruction (addressing mode) to be generated
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003820/// \param ReplacedAddend is the result register from the instruction
3821/// replacing the non-combined operand, if any.
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003822static MachineInstr *
3823genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3824 const TargetInstrInfo *TII, MachineInstr &Root,
3825 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3826 unsigned MaddOpc, const TargetRegisterClass *RC,
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003827 FMAInstKind kind = FMAInstKind::Default,
3828 const unsigned *ReplacedAddend = nullptr) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003829 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3830
3831 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3832 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003833 unsigned ResultReg = Root.getOperand(0).getReg();
3834 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3835 bool Src0IsKill = MUL->getOperand(1).isKill();
3836 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3837 bool Src1IsKill = MUL->getOperand(2).isKill();
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003838
3839 unsigned SrcReg2;
3840 bool Src2IsKill;
3841 if (ReplacedAddend) {
3842 // If we just generated a new addend, we must be it's only use.
3843 SrcReg2 = *ReplacedAddend;
3844 Src2IsKill = true;
3845 } else {
3846 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3847 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3848 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003849
3850 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3851 MRI.constrainRegClass(ResultReg, RC);
3852 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3853 MRI.constrainRegClass(SrcReg0, RC);
3854 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3855 MRI.constrainRegClass(SrcReg1, RC);
3856 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3857 MRI.constrainRegClass(SrcReg2, RC);
3858
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003859 MachineInstrBuilder MIB;
3860 if (kind == FMAInstKind::Default)
3861 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3862 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3863 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3864 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3865 else if (kind == FMAInstKind::Indexed)
3866 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3867 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3868 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3869 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3870 .addImm(MUL->getOperand(3).getImm());
3871 else if (kind == FMAInstKind::Accumulator)
3872 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3873 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3874 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3875 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3876 else
3877 assert(false && "Invalid FMA instruction kind \n");
3878 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003879 InsInstrs.push_back(MIB);
3880 return MUL;
3881}
3882
3883/// genMaddR - Generate madd instruction and combine mul and add using
3884/// an extra virtual register
3885/// Example - an ADD intermediate needs to be stored in a register:
3886/// MUL I=A,B,0
3887/// ADD R,I,Imm
3888/// ==> ORR V, ZR, Imm
3889/// ==> MADD R,A,B,V
Joel Jones7466ccf2017-07-10 22:11:50 +00003890/// \param MF Containing MachineFunction
3891/// \param MRI Register information
3892/// \param TII Target information
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003893/// \param Root is the ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003894/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003895/// contain the generated madd instruction
3896/// \param IdxMulOpd is index of operand in Root that is the result of
3897/// the MUL. In the example above IdxMulOpd is 1.
3898/// \param MaddOpc the opcode fo the madd instruction
3899/// \param VR is a virtual register that holds the value of an ADD operand
3900/// (V in the example above).
Joel Jones7466ccf2017-07-10 22:11:50 +00003901/// \param RC Register class of operands
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003902static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3903 const TargetInstrInfo *TII, MachineInstr &Root,
3904 SmallVectorImpl<MachineInstr *> &InsInstrs,
Jessica Paquette809d7082017-07-28 03:21:58 +00003905 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
3906 const TargetRegisterClass *RC) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003907 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3908
3909 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003910 unsigned ResultReg = Root.getOperand(0).getReg();
3911 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3912 bool Src0IsKill = MUL->getOperand(1).isKill();
3913 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3914 bool Src1IsKill = MUL->getOperand(2).isKill();
3915
3916 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3917 MRI.constrainRegClass(ResultReg, RC);
3918 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3919 MRI.constrainRegClass(SrcReg0, RC);
3920 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3921 MRI.constrainRegClass(SrcReg1, RC);
3922 if (TargetRegisterInfo::isVirtualRegister(VR))
3923 MRI.constrainRegClass(VR, RC);
3924
Jessica Paquette809d7082017-07-28 03:21:58 +00003925 MachineInstrBuilder MIB =
3926 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3927 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3928 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3929 .addReg(VR);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003930 // Insert the MADD
3931 InsInstrs.push_back(MIB);
3932 return MUL;
3933}
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003934
Sanjay Patelcfe03932015-06-19 23:21:42 +00003935/// When getMachineCombinerPatterns() finds potential patterns,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003936/// this function generates the instructions that could replace the
3937/// original code sequence
3938void AArch64InstrInfo::genAlternativeCodeSequence(
Sanjay Patel387e66e2015-11-05 19:34:57 +00003939 MachineInstr &Root, MachineCombinerPattern Pattern,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003940 SmallVectorImpl<MachineInstr *> &InsInstrs,
3941 SmallVectorImpl<MachineInstr *> &DelInstrs,
3942 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3943 MachineBasicBlock &MBB = *Root.getParent();
3944 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3945 MachineFunction &MF = *MBB.getParent();
Eric Christophere0818912014-09-03 20:36:26 +00003946 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003947
3948 MachineInstr *MUL;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003949 const TargetRegisterClass *RC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003950 unsigned Opc;
3951 switch (Pattern) {
3952 default:
Haicheng Wu08b94622016-01-07 04:01:02 +00003953 // Reassociate instructions.
3954 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3955 DelInstrs, InstrIdxForVirtReg);
3956 return;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003957 case MachineCombinerPattern::MULADDW_OP1:
3958 case MachineCombinerPattern::MULADDX_OP1:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003959 // MUL I=A,B,0
3960 // ADD R,I,C
3961 // ==> MADD R,A,B,C
3962 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003963 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003964 Opc = AArch64::MADDWrrr;
3965 RC = &AArch64::GPR32RegClass;
3966 } else {
3967 Opc = AArch64::MADDXrrr;
3968 RC = &AArch64::GPR64RegClass;
3969 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003970 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003971 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003972 case MachineCombinerPattern::MULADDW_OP2:
3973 case MachineCombinerPattern::MULADDX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003974 // MUL I=A,B,0
3975 // ADD R,C,I
3976 // ==> MADD R,A,B,C
3977 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003978 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003979 Opc = AArch64::MADDWrrr;
3980 RC = &AArch64::GPR32RegClass;
3981 } else {
3982 Opc = AArch64::MADDXrrr;
3983 RC = &AArch64::GPR64RegClass;
3984 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003985 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003986 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003987 case MachineCombinerPattern::MULADDWI_OP1:
3988 case MachineCombinerPattern::MULADDXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003989 // MUL I=A,B,0
3990 // ADD R,I,Imm
3991 // ==> ORR V, ZR, Imm
3992 // ==> MADD R,A,B,V
3993 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003994 const TargetRegisterClass *OrrRC;
3995 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003996 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003997 OrrOpc = AArch64::ORRWri;
3998 OrrRC = &AArch64::GPR32spRegClass;
3999 BitSize = 32;
4000 ZeroReg = AArch64::WZR;
4001 Opc = AArch64::MADDWrrr;
4002 RC = &AArch64::GPR32RegClass;
4003 } else {
4004 OrrOpc = AArch64::ORRXri;
4005 OrrRC = &AArch64::GPR64spRegClass;
4006 BitSize = 64;
4007 ZeroReg = AArch64::XZR;
4008 Opc = AArch64::MADDXrrr;
4009 RC = &AArch64::GPR64RegClass;
4010 }
4011 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4012 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004013
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004014 if (Root.getOperand(3).isImm()) {
4015 unsigned Val = Root.getOperand(3).getImm();
4016 Imm = Imm << Val;
4017 }
David Majnemer1182dd82016-07-21 23:46:56 +00004018 uint64_t UImm = SignExtend64(Imm, BitSize);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004019 uint64_t Encoding;
4020 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4021 MachineInstrBuilder MIB1 =
4022 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4023 .addReg(ZeroReg)
4024 .addImm(Encoding);
4025 InsInstrs.push_back(MIB1);
4026 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4027 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004028 }
4029 break;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004030 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00004031 case MachineCombinerPattern::MULSUBW_OP1:
4032 case MachineCombinerPattern::MULSUBX_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004033 // MUL I=A,B,0
4034 // SUB R,I, C
4035 // ==> SUB V, 0, C
4036 // ==> MADD R,A,B,V // = -C + A*B
4037 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004038 const TargetRegisterClass *SubRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004039 unsigned SubOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004040 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004041 SubOpc = AArch64::SUBWrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004042 SubRC = &AArch64::GPR32spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004043 ZeroReg = AArch64::WZR;
4044 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004045 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004046 } else {
4047 SubOpc = AArch64::SUBXrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004048 SubRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004049 ZeroReg = AArch64::XZR;
4050 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004051 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004052 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004053 unsigned NewVR = MRI.createVirtualRegister(SubRC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004054 // SUB NewVR, 0, C
4055 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004056 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004057 .addReg(ZeroReg)
Diana Picus116bbab2017-01-13 09:58:52 +00004058 .add(Root.getOperand(2));
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004059 InsInstrs.push_back(MIB1);
4060 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004061 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4062 break;
4063 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00004064 case MachineCombinerPattern::MULSUBW_OP2:
4065 case MachineCombinerPattern::MULSUBX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004066 // MUL I=A,B,0
4067 // SUB R,C,I
4068 // ==> MSUB R,A,B,C (computes C - A*B)
4069 // --- Create(MSUB);
Sanjay Patel387e66e2015-11-05 19:34:57 +00004070 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004071 Opc = AArch64::MSUBWrrr;
4072 RC = &AArch64::GPR32RegClass;
4073 } else {
4074 Opc = AArch64::MSUBXrrr;
4075 RC = &AArch64::GPR64RegClass;
4076 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004077 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004078 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004079 case MachineCombinerPattern::MULSUBWI_OP1:
4080 case MachineCombinerPattern::MULSUBXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004081 // MUL I=A,B,0
4082 // SUB R,I, Imm
4083 // ==> ORR V, ZR, -Imm
4084 // ==> MADD R,A,B,V // = -Imm + A*B
4085 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004086 const TargetRegisterClass *OrrRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004087 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004088 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
Juergen Ributzka25816b02014-08-30 06:16:26 +00004089 OrrOpc = AArch64::ORRWri;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004090 OrrRC = &AArch64::GPR32spRegClass;
4091 BitSize = 32;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004092 ZeroReg = AArch64::WZR;
4093 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004094 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004095 } else {
4096 OrrOpc = AArch64::ORRXri;
Juergen Ributzkaf9660f02014-11-04 22:20:07 +00004097 OrrRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004098 BitSize = 64;
4099 ZeroReg = AArch64::XZR;
4100 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004101 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004102 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004103 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
David Majnemer1182dd82016-07-21 23:46:56 +00004104 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004105 if (Root.getOperand(3).isImm()) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004106 unsigned Val = Root.getOperand(3).getImm();
4107 Imm = Imm << Val;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004108 }
David Majnemer1182dd82016-07-21 23:46:56 +00004109 uint64_t UImm = SignExtend64(-Imm, BitSize);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004110 uint64_t Encoding;
4111 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4112 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004113 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004114 .addReg(ZeroReg)
4115 .addImm(Encoding);
4116 InsInstrs.push_back(MIB1);
4117 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004118 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004119 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004120 break;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004121 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004122 // Floating Point Support
4123 case MachineCombinerPattern::FMULADDS_OP1:
4124 case MachineCombinerPattern::FMULADDD_OP1:
4125 // MUL I=A,B,0
4126 // ADD R,I,C
4127 // ==> MADD R,A,B,C
4128 // --- Create(MADD);
4129 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4130 Opc = AArch64::FMADDSrrr;
4131 RC = &AArch64::FPR32RegClass;
4132 } else {
4133 Opc = AArch64::FMADDDrrr;
4134 RC = &AArch64::FPR64RegClass;
4135 }
4136 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4137 break;
4138 case MachineCombinerPattern::FMULADDS_OP2:
4139 case MachineCombinerPattern::FMULADDD_OP2:
4140 // FMUL I=A,B,0
4141 // FADD R,C,I
4142 // ==> FMADD R,A,B,C
4143 // --- Create(FMADD);
4144 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4145 Opc = AArch64::FMADDSrrr;
4146 RC = &AArch64::FPR32RegClass;
4147 } else {
4148 Opc = AArch64::FMADDDrrr;
4149 RC = &AArch64::FPR64RegClass;
4150 }
4151 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4152 break;
4153
4154 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4155 Opc = AArch64::FMLAv1i32_indexed;
4156 RC = &AArch64::FPR32RegClass;
4157 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4158 FMAInstKind::Indexed);
4159 break;
4160 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4161 Opc = AArch64::FMLAv1i32_indexed;
4162 RC = &AArch64::FPR32RegClass;
4163 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4164 FMAInstKind::Indexed);
4165 break;
4166
4167 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4168 Opc = AArch64::FMLAv1i64_indexed;
4169 RC = &AArch64::FPR64RegClass;
4170 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4171 FMAInstKind::Indexed);
4172 break;
4173 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4174 Opc = AArch64::FMLAv1i64_indexed;
4175 RC = &AArch64::FPR64RegClass;
4176 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4177 FMAInstKind::Indexed);
4178 break;
4179
4180 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4181 case MachineCombinerPattern::FMLAv2f32_OP1:
4182 RC = &AArch64::FPR64RegClass;
4183 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4184 Opc = AArch64::FMLAv2i32_indexed;
4185 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4186 FMAInstKind::Indexed);
4187 } else {
4188 Opc = AArch64::FMLAv2f32;
4189 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4190 FMAInstKind::Accumulator);
4191 }
4192 break;
4193 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4194 case MachineCombinerPattern::FMLAv2f32_OP2:
4195 RC = &AArch64::FPR64RegClass;
4196 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4197 Opc = AArch64::FMLAv2i32_indexed;
4198 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4199 FMAInstKind::Indexed);
4200 } else {
4201 Opc = AArch64::FMLAv2f32;
4202 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4203 FMAInstKind::Accumulator);
4204 }
4205 break;
4206
4207 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4208 case MachineCombinerPattern::FMLAv2f64_OP1:
4209 RC = &AArch64::FPR128RegClass;
4210 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4211 Opc = AArch64::FMLAv2i64_indexed;
4212 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4213 FMAInstKind::Indexed);
4214 } else {
4215 Opc = AArch64::FMLAv2f64;
4216 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4217 FMAInstKind::Accumulator);
4218 }
4219 break;
4220 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4221 case MachineCombinerPattern::FMLAv2f64_OP2:
4222 RC = &AArch64::FPR128RegClass;
4223 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4224 Opc = AArch64::FMLAv2i64_indexed;
4225 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4226 FMAInstKind::Indexed);
4227 } else {
4228 Opc = AArch64::FMLAv2f64;
4229 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4230 FMAInstKind::Accumulator);
4231 }
4232 break;
4233
4234 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4235 case MachineCombinerPattern::FMLAv4f32_OP1:
4236 RC = &AArch64::FPR128RegClass;
4237 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4238 Opc = AArch64::FMLAv4i32_indexed;
4239 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4240 FMAInstKind::Indexed);
4241 } else {
4242 Opc = AArch64::FMLAv4f32;
4243 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4244 FMAInstKind::Accumulator);
4245 }
4246 break;
4247
4248 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4249 case MachineCombinerPattern::FMLAv4f32_OP2:
4250 RC = &AArch64::FPR128RegClass;
4251 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4252 Opc = AArch64::FMLAv4i32_indexed;
4253 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4254 FMAInstKind::Indexed);
4255 } else {
4256 Opc = AArch64::FMLAv4f32;
4257 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4258 FMAInstKind::Accumulator);
4259 }
4260 break;
4261
4262 case MachineCombinerPattern::FMULSUBS_OP1:
4263 case MachineCombinerPattern::FMULSUBD_OP1: {
4264 // FMUL I=A,B,0
4265 // FSUB R,I,C
4266 // ==> FNMSUB R,A,B,C // = -C + A*B
4267 // --- Create(FNMSUB);
4268 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4269 Opc = AArch64::FNMSUBSrrr;
4270 RC = &AArch64::FPR32RegClass;
4271 } else {
4272 Opc = AArch64::FNMSUBDrrr;
4273 RC = &AArch64::FPR64RegClass;
4274 }
4275 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4276 break;
4277 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00004278
4279 case MachineCombinerPattern::FNMULSUBS_OP1:
4280 case MachineCombinerPattern::FNMULSUBD_OP1: {
4281 // FNMUL I=A,B,0
4282 // FSUB R,I,C
4283 // ==> FNMADD R,A,B,C // = -A*B - C
4284 // --- Create(FNMADD);
4285 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4286 Opc = AArch64::FNMADDSrrr;
4287 RC = &AArch64::FPR32RegClass;
4288 } else {
4289 Opc = AArch64::FNMADDDrrr;
4290 RC = &AArch64::FPR64RegClass;
4291 }
4292 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4293 break;
4294 }
4295
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004296 case MachineCombinerPattern::FMULSUBS_OP2:
4297 case MachineCombinerPattern::FMULSUBD_OP2: {
4298 // FMUL I=A,B,0
4299 // FSUB R,C,I
4300 // ==> FMSUB R,A,B,C (computes C - A*B)
4301 // --- Create(FMSUB);
4302 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4303 Opc = AArch64::FMSUBSrrr;
4304 RC = &AArch64::FPR32RegClass;
4305 } else {
4306 Opc = AArch64::FMSUBDrrr;
4307 RC = &AArch64::FPR64RegClass;
4308 }
4309 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4310 break;
Chad Rosier8b12a032017-05-16 12:43:23 +00004311 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004312
4313 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4314 Opc = AArch64::FMLSv1i32_indexed;
4315 RC = &AArch64::FPR32RegClass;
4316 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4317 FMAInstKind::Indexed);
4318 break;
4319
4320 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4321 Opc = AArch64::FMLSv1i64_indexed;
4322 RC = &AArch64::FPR64RegClass;
4323 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4324 FMAInstKind::Indexed);
4325 break;
4326
4327 case MachineCombinerPattern::FMLSv2f32_OP2:
4328 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4329 RC = &AArch64::FPR64RegClass;
4330 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4331 Opc = AArch64::FMLSv2i32_indexed;
4332 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4333 FMAInstKind::Indexed);
4334 } else {
4335 Opc = AArch64::FMLSv2f32;
4336 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4337 FMAInstKind::Accumulator);
4338 }
4339 break;
4340
4341 case MachineCombinerPattern::FMLSv2f64_OP2:
4342 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4343 RC = &AArch64::FPR128RegClass;
4344 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4345 Opc = AArch64::FMLSv2i64_indexed;
4346 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4347 FMAInstKind::Indexed);
4348 } else {
4349 Opc = AArch64::FMLSv2f64;
4350 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4351 FMAInstKind::Accumulator);
4352 }
4353 break;
4354
4355 case MachineCombinerPattern::FMLSv4f32_OP2:
4356 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4357 RC = &AArch64::FPR128RegClass;
4358 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4359 Opc = AArch64::FMLSv4i32_indexed;
4360 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4361 FMAInstKind::Indexed);
4362 } else {
4363 Opc = AArch64::FMLSv4f32;
4364 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4365 FMAInstKind::Accumulator);
4366 }
4367 break;
Florian Hahn5d6a4e42017-12-06 22:48:36 +00004368 case MachineCombinerPattern::FMLSv2f32_OP1:
4369 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4370 RC = &AArch64::FPR64RegClass;
4371 unsigned NewVR = MRI.createVirtualRegister(RC);
4372 MachineInstrBuilder MIB1 =
4373 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4374 .add(Root.getOperand(2));
4375 InsInstrs.push_back(MIB1);
4376 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4377 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4378 Opc = AArch64::FMLAv2i32_indexed;
4379 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4380 FMAInstKind::Indexed, &NewVR);
4381 } else {
4382 Opc = AArch64::FMLAv2f32;
4383 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4384 FMAInstKind::Accumulator, &NewVR);
4385 }
4386 break;
4387 }
4388 case MachineCombinerPattern::FMLSv4f32_OP1:
4389 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4390 RC = &AArch64::FPR128RegClass;
4391 unsigned NewVR = MRI.createVirtualRegister(RC);
4392 MachineInstrBuilder MIB1 =
4393 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4394 .add(Root.getOperand(2));
4395 InsInstrs.push_back(MIB1);
4396 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4397 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4398 Opc = AArch64::FMLAv4i32_indexed;
4399 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4400 FMAInstKind::Indexed, &NewVR);
4401 } else {
4402 Opc = AArch64::FMLAv4f32;
4403 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4404 FMAInstKind::Accumulator, &NewVR);
4405 }
4406 break;
4407 }
4408 case MachineCombinerPattern::FMLSv2f64_OP1:
4409 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4410 RC = &AArch64::FPR128RegClass;
4411 unsigned NewVR = MRI.createVirtualRegister(RC);
4412 MachineInstrBuilder MIB1 =
4413 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4414 .add(Root.getOperand(2));
4415 InsInstrs.push_back(MIB1);
4416 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4417 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4418 Opc = AArch64::FMLAv2i64_indexed;
4419 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4420 FMAInstKind::Indexed, &NewVR);
4421 } else {
4422 Opc = AArch64::FMLAv2f64;
4423 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4424 FMAInstKind::Accumulator, &NewVR);
4425 }
4426 break;
4427 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004428 } // end switch (Pattern)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004429 // Record MUL and ADD/SUB for deletion
4430 DelInstrs.push_back(MUL);
4431 DelInstrs.push_back(&Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004432}
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004433
4434/// \brief Replace csincr-branch sequence by simple conditional branch
4435///
4436/// Examples:
Joel Jonesaff09bf2017-07-06 14:17:36 +00004437/// 1. \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004438/// csinc w9, wzr, wzr, <condition code>
4439/// tbnz w9, #0, 0x44
Joel Jonesaff09bf2017-07-06 14:17:36 +00004440/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004441/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004442/// \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004443/// b.<inverted condition code>
Joel Jonesaff09bf2017-07-06 14:17:36 +00004444/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004445///
Joel Jonesaff09bf2017-07-06 14:17:36 +00004446/// 2. \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004447/// csinc w9, wzr, wzr, <condition code>
4448/// tbz w9, #0, 0x44
Joel Jonesaff09bf2017-07-06 14:17:36 +00004449/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004450/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004451/// \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004452/// b.<condition code>
Joel Jonesaff09bf2017-07-06 14:17:36 +00004453/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004454///
Chad Rosier4aeab5f2016-03-21 13:43:58 +00004455/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4456/// compare's constant operand is power of 2.
Balaram Makame9b27252016-03-10 17:54:55 +00004457///
4458/// Examples:
Joel Jonesaff09bf2017-07-06 14:17:36 +00004459/// \code
Balaram Makame9b27252016-03-10 17:54:55 +00004460/// and w8, w8, #0x400
4461/// cbnz w8, L1
Joel Jonesaff09bf2017-07-06 14:17:36 +00004462/// \endcode
Balaram Makame9b27252016-03-10 17:54:55 +00004463/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004464/// \code
Balaram Makame9b27252016-03-10 17:54:55 +00004465/// tbnz w8, #10, L1
Joel Jonesaff09bf2017-07-06 14:17:36 +00004466/// \endcode
Balaram Makame9b27252016-03-10 17:54:55 +00004467///
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004468/// \param MI Conditional Branch
4469/// \return True when the simple conditional branch is generated
4470///
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004471bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004472 bool IsNegativeBranch = false;
4473 bool IsTestAndBranch = false;
4474 unsigned TargetBBInMI = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004475 switch (MI.getOpcode()) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004476 default:
4477 llvm_unreachable("Unknown branch instruction?");
4478 case AArch64::Bcc:
4479 return false;
4480 case AArch64::CBZW:
4481 case AArch64::CBZX:
4482 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004483 break;
4484 case AArch64::CBNZW:
4485 case AArch64::CBNZX:
4486 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004487 IsNegativeBranch = true;
4488 break;
4489 case AArch64::TBZW:
4490 case AArch64::TBZX:
4491 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004492 IsTestAndBranch = true;
4493 break;
4494 case AArch64::TBNZW:
4495 case AArch64::TBNZX:
4496 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004497 IsNegativeBranch = true;
4498 IsTestAndBranch = true;
4499 break;
4500 }
4501 // So we increment a zero register and test for bits other
4502 // than bit 0? Conservatively bail out in case the verifier
4503 // missed this case.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004504 if (IsTestAndBranch && MI.getOperand(1).getImm())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004505 return false;
4506
4507 // Find Definition.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004508 assert(MI.getParent() && "Incomplete machine instruciton\n");
4509 MachineBasicBlock *MBB = MI.getParent();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004510 MachineFunction *MF = MBB->getParent();
4511 MachineRegisterInfo *MRI = &MF->getRegInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004512 unsigned VReg = MI.getOperand(0).getReg();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004513 if (!TargetRegisterInfo::isVirtualRegister(VReg))
4514 return false;
4515
4516 MachineInstr *DefMI = MRI->getVRegDef(VReg);
4517
Balaram Makame9b27252016-03-10 17:54:55 +00004518 // Look through COPY instructions to find definition.
4519 while (DefMI->isCopy()) {
4520 unsigned CopyVReg = DefMI->getOperand(1).getReg();
4521 if (!MRI->hasOneNonDBGUse(CopyVReg))
4522 return false;
4523 if (!MRI->hasOneDef(CopyVReg))
4524 return false;
4525 DefMI = MRI->getVRegDef(CopyVReg);
4526 }
4527
4528 switch (DefMI->getOpcode()) {
4529 default:
4530 return false;
4531 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4532 case AArch64::ANDWri:
4533 case AArch64::ANDXri: {
4534 if (IsTestAndBranch)
4535 return false;
4536 if (DefMI->getParent() != MBB)
4537 return false;
4538 if (!MRI->hasOneNonDBGUse(VReg))
4539 return false;
4540
Quentin Colombetabe2d012016-04-25 20:54:08 +00004541 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
Balaram Makame9b27252016-03-10 17:54:55 +00004542 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
Quentin Colombetabe2d012016-04-25 20:54:08 +00004543 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
Balaram Makame9b27252016-03-10 17:54:55 +00004544 if (!isPowerOf2_64(Mask))
4545 return false;
4546
4547 MachineOperand &MO = DefMI->getOperand(1);
4548 unsigned NewReg = MO.getReg();
4549 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4550 return false;
4551
4552 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4553
4554 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004555 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4556 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004557 unsigned Imm = Log2_64(Mask);
Renato Golin179d1f52016-04-23 19:30:52 +00004558 unsigned Opc = (Imm < 32)
4559 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4560 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004561 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4562 .addReg(NewReg)
4563 .addImm(Imm)
4564 .addMBB(TBB);
Matthias Braune25bbd02016-05-03 04:54:16 +00004565 // Register lives on to the CBZ now.
4566 MO.setIsKill(false);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004567
4568 // For immediate smaller than 32, we need to use the 32-bit
4569 // variant (W) in all cases. Indeed the 64-bit variant does not
4570 // allow to encode them.
4571 // Therefore, if the input register is 64-bit, we need to take the
4572 // 32-bit sub-part.
4573 if (!Is32Bit && Imm < 32)
4574 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004575 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004576 return true;
4577 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004578 // Look for CSINC
Balaram Makame9b27252016-03-10 17:54:55 +00004579 case AArch64::CSINCWr:
4580 case AArch64::CSINCXr: {
4581 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4582 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4583 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4584 DefMI->getOperand(2).getReg() == AArch64::XZR))
4585 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004586
Balaram Makame9b27252016-03-10 17:54:55 +00004587 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4588 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004589
Balaram Makame9b27252016-03-10 17:54:55 +00004590 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
Balaram Makame9b27252016-03-10 17:54:55 +00004591 // Convert only when the condition code is not modified between
4592 // the CSINC and the branch. The CC may be used by other
4593 // instructions in between.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00004594 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
Balaram Makame9b27252016-03-10 17:54:55 +00004595 return false;
4596 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004597 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4598 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004599 if (IsNegativeBranch)
4600 CC = AArch64CC::getInvertedCondCode(CC);
4601 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004602 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004603 return true;
4604 }
4605 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004606}
Alex Lorenzf3630112015-08-18 22:52:15 +00004607
4608std::pair<unsigned, unsigned>
4609AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4610 const unsigned Mask = AArch64II::MO_FRAGMENT;
4611 return std::make_pair(TF & Mask, TF & ~Mask);
4612}
4613
4614ArrayRef<std::pair<unsigned, const char *>>
4615AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4616 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004617
Hal Finkel982e8d42015-08-30 08:07:29 +00004618 static const std::pair<unsigned, const char *> TargetFlags[] = {
Jessica Paquette809d7082017-07-28 03:21:58 +00004619 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4620 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4621 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
Alex Lorenzf3630112015-08-18 22:52:15 +00004622 {MO_HI12, "aarch64-hi12"}};
4623 return makeArrayRef(TargetFlags);
4624}
4625
4626ArrayRef<std::pair<unsigned, const char *>>
4627AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4628 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004629
Hal Finkel982e8d42015-08-30 08:07:29 +00004630 static const std::pair<unsigned, const char *> TargetFlags[] = {
Jessica Paquette809d7082017-07-28 03:21:58 +00004631 {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
Alex Lorenzf3630112015-08-18 22:52:15 +00004632 return makeArrayRef(TargetFlags);
4633}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004634
Geoff Berry6748abe2017-07-13 02:28:54 +00004635ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
4636AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4637 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
Geoff Berryb1e87142017-07-14 21:44:12 +00004638 {{MOSuppressPair, "aarch64-suppress-pair"},
4639 {MOStridedAccess, "aarch64-strided-access"}};
Geoff Berry6748abe2017-07-13 02:28:54 +00004640 return makeArrayRef(TargetFlags);
4641}
4642
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004643/// Constants defining how certain sequences should be outlined.
4644/// This encompasses how an outlined function should be called, and what kind of
4645/// frame should be emitted for that outlined function.
4646///
4647/// \p MachineOutlinerDefault implies that the function should be called with
4648/// a save and restore of LR to the stack.
4649///
4650/// That is,
4651///
4652/// I1 Save LR OUTLINED_FUNCTION:
4653/// I2 --> BL OUTLINED_FUNCTION I1
4654/// I3 Restore LR I2
4655/// I3
4656/// RET
4657///
4658/// * Call construction overhead: 3 (save + BL + restore)
4659/// * Frame construction overhead: 1 (ret)
4660/// * Requires stack fixups? Yes
4661///
4662/// \p MachineOutlinerTailCall implies that the function is being created from
4663/// a sequence of instructions ending in a return.
4664///
4665/// That is,
4666///
4667/// I1 OUTLINED_FUNCTION:
4668/// I2 --> B OUTLINED_FUNCTION I1
4669/// RET I2
4670/// RET
4671///
4672/// * Call construction overhead: 1 (B)
4673/// * Frame construction overhead: 0 (Return included in sequence)
4674/// * Requires stack fixups? No
4675///
4676/// \p MachineOutlinerNoLRSave implies that the function should be called using
4677/// a BL instruction, but doesn't require LR to be saved and restored. This
4678/// happens when LR is known to be dead.
4679///
4680/// That is,
4681///
4682/// I1 OUTLINED_FUNCTION:
4683/// I2 --> BL OUTLINED_FUNCTION I1
4684/// I3 I2
4685/// I3
4686/// RET
4687///
4688/// * Call construction overhead: 1 (BL)
4689/// * Frame construction overhead: 1 (RET)
4690/// * Requires stack fixups? No
4691///
4692enum MachineOutlinerClass {
4693 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4694 MachineOutlinerTailCall, /// Only emit a branch.
4695 MachineOutlinerNoLRSave /// Emit a call and return.
4696};
Jessica Paquetted87f5442017-07-29 02:55:46 +00004697
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004698bool AArch64InstrInfo::canOutlineWithoutLRSave(
4699 MachineBasicBlock::iterator &CallInsertionPt) const {
4700 // Was LR saved in the function containing this basic block?
4701 MachineBasicBlock &MBB = *(CallInsertionPt->getParent());
4702 LiveRegUnits LRU(getRegisterInfo());
4703 LRU.addLiveOuts(MBB);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004704
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004705 // Get liveness information from the end of the block to the end of the
4706 // prospective outlined region.
4707 std::for_each(MBB.rbegin(),
4708 (MachineBasicBlock::reverse_iterator)CallInsertionPt,
4709 [&LRU](MachineInstr &MI) {LRU.stepBackward(MI);}
4710 );
4711
4712 // If the link register is available at this point, then we can safely outline
4713 // the region without saving/restoring LR. Otherwise, we must emit a save and
4714 // restore.
4715 return LRU.available(AArch64::LR);
Jessica Paquette809d7082017-07-28 03:21:58 +00004716}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004717
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004718AArch64GenInstrInfo::MachineOutlinerInfo
4719AArch64InstrInfo::getOutlininingCandidateInfo(
4720 std::vector<
4721 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
4722 &RepeatedSequenceLocs) const {
Jessica Paquette809d7082017-07-28 03:21:58 +00004723
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004724 unsigned CallID = MachineOutlinerDefault;
4725 unsigned FrameID = MachineOutlinerDefault;
4726 unsigned NumInstrsForCall = 3;
4727 unsigned NumInstrsToCreateFrame = 1;
Jessica Paquette809d7082017-07-28 03:21:58 +00004728
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004729 auto DoesntNeedLRSave =
4730 [this](std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>
4731 &I) { return canOutlineWithoutLRSave(I.second); };
4732
4733 // If the last instruction in any candidate is a terminator, then we should
4734 // tail call all of the candidates.
4735 if (RepeatedSequenceLocs[0].second->isTerminator()) {
4736 CallID = MachineOutlinerTailCall;
4737 FrameID = MachineOutlinerTailCall;
4738 NumInstrsForCall = 1;
4739 NumInstrsToCreateFrame = 0;
4740 }
4741
4742 else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4743 DoesntNeedLRSave)) {
4744 CallID = MachineOutlinerNoLRSave;
4745 FrameID = MachineOutlinerNoLRSave;
4746 NumInstrsForCall = 1;
4747 NumInstrsToCreateFrame = 1;
4748 }
4749
4750 return MachineOutlinerInfo(NumInstrsForCall, NumInstrsToCreateFrame, CallID,
4751 FrameID);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004752}
4753
Jessica Paquette13593842017-10-07 00:16:34 +00004754bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF,
4755 bool OutlineFromLinkOnceODRs) const {
4756 const Function *F = MF.getFunction();
4757
4758 // If F uses a redzone, then don't outline from it because it might mess up
4759 // the stack.
4760 if (!F->hasFnAttribute(Attribute::NoRedZone))
4761 return false;
4762
4763 // If anyone is using the address of this function, don't outline from it.
4764 if (F->hasAddressTaken())
4765 return false;
4766
4767 // Can F be deduplicated by the linker? If it can, don't outline from it.
4768 if (!OutlineFromLinkOnceODRs && F->hasLinkOnceODRLinkage())
4769 return false;
4770
4771 return true;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004772}
4773
4774AArch64GenInstrInfo::MachineOutlinerInstrType
4775AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
4776
4777 MachineFunction *MF = MI.getParent()->getParent();
4778 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
4779
4780 // Don't outline LOHs.
4781 if (FuncInfo->getLOHRelated().count(&MI))
4782 return MachineOutlinerInstrType::Illegal;
4783
4784 // Don't allow debug values to impact outlining type.
4785 if (MI.isDebugValue() || MI.isIndirectDebugValue())
4786 return MachineOutlinerInstrType::Invisible;
4787
4788 // Is this a terminator for a basic block?
4789 if (MI.isTerminator()) {
4790
4791 // Is this the end of a function?
4792 if (MI.getParent()->succ_empty())
Jessica Paquette809d7082017-07-28 03:21:58 +00004793 return MachineOutlinerInstrType::Legal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004794
4795 // It's not, so don't outline it.
4796 return MachineOutlinerInstrType::Illegal;
4797 }
4798
4799 // Don't outline positions.
4800 if (MI.isPosition())
4801 return MachineOutlinerInstrType::Illegal;
4802
Jessica Paquetted36945b2017-08-08 21:51:26 +00004803 // Don't touch the link register or W30.
4804 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
4805 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
4806 return MachineOutlinerInstrType::Illegal;
4807
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004808 // Make sure none of the operands are un-outlinable.
Jessica Paquetted36945b2017-08-08 21:51:26 +00004809 for (const MachineOperand &MOP : MI.operands()) {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004810 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
4811 MOP.isTargetIndex())
4812 return MachineOutlinerInstrType::Illegal;
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004813
4814 // Don't outline anything that uses the link register.
4815 if (MOP.isReg() && getRegisterInfo().regsOverlap(MOP.getReg(), AArch64::LR))
4816 return MachineOutlinerInstrType::Illegal;
Jessica Paquetted36945b2017-08-08 21:51:26 +00004817 }
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004818
4819 // Does this use the stack?
4820 if (MI.modifiesRegister(AArch64::SP, &RI) ||
4821 MI.readsRegister(AArch64::SP, &RI)) {
4822
4823 // Is it a memory operation?
4824 if (MI.mayLoadOrStore()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00004825 unsigned Base; // Filled with the base regiser of MI.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004826 int64_t Offset; // Filled with the offset of MI.
4827 unsigned DummyWidth;
4828
4829 // Does it allow us to offset the base register and is the base SP?
4830 if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
Jessica Paquette809d7082017-07-28 03:21:58 +00004831 Base != AArch64::SP)
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004832 return MachineOutlinerInstrType::Illegal;
4833
4834 // Find the minimum/maximum offset for this instruction and check if
4835 // fixing it up would be in range.
4836 int64_t MinOffset, MaxOffset;
4837 unsigned DummyScale;
4838 getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset,
4839 MaxOffset);
4840
4841 // TODO: We should really test what happens if an instruction overflows.
4842 // This is tricky to test with IR tests, but when the outliner is moved
4843 // to a MIR test, it really ought to be checked.
Jessica Paquette5d59a4e2017-03-20 15:51:45 +00004844 if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
Jessica Paquette809d7082017-07-28 03:21:58 +00004845 return MachineOutlinerInstrType::Illegal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004846
4847 // It's in range, so we can outline it.
4848 return MachineOutlinerInstrType::Legal;
4849 }
4850
4851 // We can't fix it up, so don't outline it.
4852 return MachineOutlinerInstrType::Illegal;
4853 }
4854
4855 return MachineOutlinerInstrType::Legal;
4856}
4857
4858void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
4859 for (MachineInstr &MI : MBB) {
4860 unsigned Base, Width;
4861 int64_t Offset;
4862
4863 // Is this a load or store with an immediate offset with SP as the base?
4864 if (!MI.mayLoadOrStore() ||
4865 !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
4866 Base != AArch64::SP)
4867 continue;
4868
4869 // It is, so we have to fix it up.
4870 unsigned Scale;
4871 int64_t Dummy1, Dummy2;
4872
4873 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
4874 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
4875 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
4876 assert(Scale != 0 && "Unexpected opcode!");
4877
4878 // We've pushed the return address to the stack, so add 16 to the offset.
4879 // This is safe, since we already checked if it would overflow when we
4880 // checked if this instruction was legal to outline.
Jessica Paquette809d7082017-07-28 03:21:58 +00004881 int64_t NewImm = (Offset + 16) / Scale;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004882 StackOffsetOperand.setImm(NewImm);
4883 }
4884}
4885
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004886void AArch64InstrInfo::insertOutlinerEpilogue(
4887 MachineBasicBlock &MBB, MachineFunction &MF,
4888 const MachineOutlinerInfo &MInfo) const {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004889
4890 // If this is a tail call outlined function, then there's already a return.
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004891 if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004892 return;
4893
4894 // It's not a tail call, so we have to insert the return ourselves.
4895 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
4896 .addReg(AArch64::LR, RegState::Undef);
4897 MBB.insert(MBB.end(), ret);
4898
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004899 // Did we have to modify the stack by saving the link register?
4900 if (MInfo.FrameConstructionID == MachineOutlinerNoLRSave)
4901 return;
4902
4903 // We modified the stack.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004904 // Walk over the basic block and fix up all the stack accesses.
4905 fixupPostOutline(MBB);
4906}
4907
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004908void AArch64InstrInfo::insertOutlinerPrologue(
4909 MachineBasicBlock &MBB, MachineFunction &MF,
4910 const MachineOutlinerInfo &MInfo) const {}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004911
4912MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
4913 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004914 MachineFunction &MF, const MachineOutlinerInfo &MInfo) const {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004915
4916 // Are we tail calling?
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004917 if (MInfo.CallConstructionID == MachineOutlinerTailCall) {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004918 // If yes, then we can just branch to the label.
Jessica Paquetted87f5442017-07-29 02:55:46 +00004919 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B))
4920 .addGlobalAddress(M.getNamedValue(MF.getName())));
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004921 return It;
4922 }
4923
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004924 // Are we saving the link register?
4925 if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) {
4926 // No, so just insert the call.
4927 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
4928 .addGlobalAddress(M.getNamedValue(MF.getName())));
4929 return It;
4930 }
4931
4932 // We have a default call. Save the link register.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004933 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
4934 .addReg(AArch64::SP, RegState::Define)
4935 .addReg(AArch64::LR)
4936 .addReg(AArch64::SP)
4937 .addImm(-16);
4938 It = MBB.insert(It, STRXpre);
4939 It++;
4940
4941 // Insert the call.
Jessica Paquetted87f5442017-07-29 02:55:46 +00004942 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
4943 .addGlobalAddress(M.getNamedValue(MF.getName())));
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004944
4945 It++;
4946
4947 // Restore the link register.
4948 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
4949 .addReg(AArch64::SP, RegState::Define)
Jessica Paquette6315d2d2017-08-10 23:11:24 +00004950 .addReg(AArch64::LR, RegState::Define)
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004951 .addReg(AArch64::SP)
4952 .addImm(16);
4953 It = MBB.insert(It, LDRXpost);
4954
4955 return It;
4956}