blob: 1ad51d5a449afafa11f8d0970d1c17ac9ecf5f35 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
Jessica Paquetteea8cc092017-03-17 22:26:55 +000015#include "AArch64MachineFunctionInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000016#include "AArch64Subtarget.h"
17#include "MCTargetDesc/AArch64AddressingModes.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000018#include "Utils/AArch64BaseInfo.h"
19#include "llvm/ADT/ArrayRef.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000020#include "llvm/ADT/STLExtras.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000021#include "llvm/ADT/SmallVector.h"
Jessica Paquette4cf187b2017-09-27 20:47:39 +000022#include "llvm/CodeGen/LiveRegUnits.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000023#include "llvm/CodeGen/MachineBasicBlock.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000025#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineInstr.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000027#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineMemOperand.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000029#include "llvm/CodeGen/MachineOperand.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000030#include "llvm/CodeGen/MachineRegisterInfo.h"
Jessica Paquette02c124d2017-12-18 19:33:21 +000031#include "llvm/CodeGen/MachineModuleInfo.h"
Diana Picus4b972882016-09-13 07:45:17 +000032#include "llvm/CodeGen/StackMaps.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000033#include "llvm/CodeGen/TargetRegisterInfo.h"
34#include "llvm/CodeGen/TargetSubtargetInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000035#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/GlobalValue.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000037#include "llvm/MC/MCInst.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000038#include "llvm/MC/MCInstrDesc.h"
39#include "llvm/Support/Casting.h"
40#include "llvm/Support/CodeGen.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Compiler.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000043#include "llvm/Support/ErrorHandling.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000044#include "llvm/Support/MathExtras.h"
45#include "llvm/Target/TargetMachine.h"
46#include "llvm/Target/TargetOptions.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000047#include <cassert>
48#include <cstdint>
49#include <iterator>
50#include <utility>
Tim Northover3b0846e2014-05-24 12:50:23 +000051
52using namespace llvm;
53
54#define GET_INSTRINFO_CTOR_DTOR
55#include "AArch64GenInstrInfo.inc"
56
Jessica Paquette809d7082017-07-28 03:21:58 +000057static cl::opt<unsigned> TBZDisplacementBits(
58 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60
61static cl::opt<unsigned> CBZDisplacementBits(
62 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
Matt Arsenaulte8da1452016-08-02 08:06:17 +000064
65static cl::opt<unsigned>
Jessica Paquette809d7082017-07-28 03:21:58 +000066 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
Matt Arsenaulte8da1452016-08-02 08:06:17 +000068
Tim Northover3b0846e2014-05-24 12:50:23 +000069AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
70 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
Eric Christophera0de2532015-03-18 20:37:30 +000071 RI(STI.getTargetTriple()), Subtarget(STI) {}
Tim Northover3b0846e2014-05-24 12:50:23 +000072
73/// GetInstSize - Return the number of bytes of code the specified
74/// instruction may be. This returns the maximum number of bytes.
Sjoerd Meijer89217f82016-07-28 16:32:22 +000075unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000076 const MachineBasicBlock &MBB = *MI.getParent();
Tim Northoverd5531f72014-06-17 11:31:42 +000077 const MachineFunction *MF = MBB.getParent();
78 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +000079
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000080 if (MI.getOpcode() == AArch64::INLINEASM)
81 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
Tim Northoverd5531f72014-06-17 11:31:42 +000082
Diana Picus4b972882016-09-13 07:45:17 +000083 // FIXME: We currently only handle pseudoinstructions that don't get expanded
84 // before the assembly printer.
85 unsigned NumBytes = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000086 const MCInstrDesc &Desc = MI.getDesc();
Tim Northover3b0846e2014-05-24 12:50:23 +000087 switch (Desc.getOpcode()) {
88 default:
Diana Picusc65d8bd2016-07-27 15:13:25 +000089 // Anything not explicitly designated otherwise is a normal 4-byte insn.
Diana Picus4b972882016-09-13 07:45:17 +000090 NumBytes = 4;
91 break;
Tim Northover3b0846e2014-05-24 12:50:23 +000092 case TargetOpcode::DBG_VALUE:
93 case TargetOpcode::EH_LABEL:
94 case TargetOpcode::IMPLICIT_DEF:
95 case TargetOpcode::KILL:
Diana Picus4b972882016-09-13 07:45:17 +000096 NumBytes = 0;
97 break;
98 case TargetOpcode::STACKMAP:
99 // The upper bound for a stackmap intrinsic is the full length of its shadow
100 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
101 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
102 break;
103 case TargetOpcode::PATCHPOINT:
104 // The size of the patchpoint intrinsic is the number of bytes requested
105 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
106 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
107 break;
Diana Picusab5a4c72016-08-01 08:38:49 +0000108 case AArch64::TLSDESC_CALLSEQ:
109 // This gets lowered to an instruction sequence which takes 16 bytes
Diana Picus4b972882016-09-13 07:45:17 +0000110 NumBytes = 16;
111 break;
Tim Northover3b0846e2014-05-24 12:50:23 +0000112 }
113
Diana Picus4b972882016-09-13 07:45:17 +0000114 return NumBytes;
Tim Northover3b0846e2014-05-24 12:50:23 +0000115}
116
117static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
118 SmallVectorImpl<MachineOperand> &Cond) {
119 // Block ends with fall-through condbranch.
120 switch (LastInst->getOpcode()) {
121 default:
122 llvm_unreachable("Unknown branch instruction?");
123 case AArch64::Bcc:
124 Target = LastInst->getOperand(1).getMBB();
125 Cond.push_back(LastInst->getOperand(0));
126 break;
127 case AArch64::CBZW:
128 case AArch64::CBZX:
129 case AArch64::CBNZW:
130 case AArch64::CBNZX:
131 Target = LastInst->getOperand(1).getMBB();
132 Cond.push_back(MachineOperand::CreateImm(-1));
133 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
134 Cond.push_back(LastInst->getOperand(0));
135 break;
136 case AArch64::TBZW:
137 case AArch64::TBZX:
138 case AArch64::TBNZW:
139 case AArch64::TBNZX:
140 Target = LastInst->getOperand(2).getMBB();
141 Cond.push_back(MachineOperand::CreateImm(-1));
142 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
143 Cond.push_back(LastInst->getOperand(0));
144 Cond.push_back(LastInst->getOperand(1));
145 }
146}
147
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000148static unsigned getBranchDisplacementBits(unsigned Opc) {
149 switch (Opc) {
150 default:
151 llvm_unreachable("unexpected opcode!");
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000152 case AArch64::B:
153 return 64;
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000154 case AArch64::TBNZW:
155 case AArch64::TBZW:
156 case AArch64::TBNZX:
157 case AArch64::TBZX:
158 return TBZDisplacementBits;
159 case AArch64::CBNZW:
160 case AArch64::CBZW:
161 case AArch64::CBNZX:
162 case AArch64::CBZX:
163 return CBZDisplacementBits;
164 case AArch64::Bcc:
165 return BCCDisplacementBits;
166 }
167}
168
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000169bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
170 int64_t BrOffset) const {
171 unsigned Bits = getBranchDisplacementBits(BranchOp);
172 assert(Bits >= 3 && "max branch displacement must be enough to jump"
173 "over conditional branch expansion");
174 return isIntN(Bits, BrOffset / 4);
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000175}
176
Jessica Paquette809d7082017-07-28 03:21:58 +0000177MachineBasicBlock *
178AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
Matt Arsenault0a3ea892016-10-06 15:38:09 +0000179 switch (MI.getOpcode()) {
180 default:
181 llvm_unreachable("unexpected opcode!");
182 case AArch64::B:
183 return MI.getOperand(0).getMBB();
184 case AArch64::TBZW:
185 case AArch64::TBNZW:
186 case AArch64::TBZX:
187 case AArch64::TBNZX:
188 return MI.getOperand(2).getMBB();
189 case AArch64::CBZW:
190 case AArch64::CBNZW:
191 case AArch64::CBZX:
192 case AArch64::CBNZX:
193 case AArch64::Bcc:
194 return MI.getOperand(1).getMBB();
195 }
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000196}
197
Tim Northover3b0846e2014-05-24 12:50:23 +0000198// Branch analysis.
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000199bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
200 MachineBasicBlock *&TBB,
201 MachineBasicBlock *&FBB,
202 SmallVectorImpl<MachineOperand> &Cond,
203 bool AllowModify) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000204 // If the block has no terminators, it just falls into the block after it.
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000205 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
206 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000207 return false;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000208
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000209 if (!isUnpredicatedTerminator(*I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000210 return false;
211
212 // Get the last instruction in the block.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000213 MachineInstr *LastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000214
215 // If there is only one terminator instruction, process it.
216 unsigned LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000217 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000218 if (isUncondBranchOpcode(LastOpc)) {
219 TBB = LastInst->getOperand(0).getMBB();
220 return false;
221 }
222 if (isCondBranchOpcode(LastOpc)) {
223 // Block ends with fall-through condbranch.
224 parseCondBranch(LastInst, TBB, Cond);
225 return false;
226 }
227 return true; // Can't handle indirect branch.
228 }
229
230 // Get the instruction before it if it is a terminator.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000231 MachineInstr *SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000232 unsigned SecondLastOpc = SecondLastInst->getOpcode();
233
234 // If AllowModify is true and the block ends with two or more unconditional
235 // branches, delete all but the first unconditional branch.
236 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
237 while (isUncondBranchOpcode(SecondLastOpc)) {
238 LastInst->eraseFromParent();
239 LastInst = SecondLastInst;
240 LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000241 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000242 // Return now the only terminator is an unconditional branch.
243 TBB = LastInst->getOperand(0).getMBB();
244 return false;
245 } else {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000246 SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000247 SecondLastOpc = SecondLastInst->getOpcode();
248 }
249 }
250 }
251
252 // If there are three terminators, we don't know what sort of block this is.
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000253 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000254 return true;
255
256 // If the block ends with a B and a Bcc, handle it.
257 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
258 parseCondBranch(SecondLastInst, TBB, Cond);
259 FBB = LastInst->getOperand(0).getMBB();
260 return false;
261 }
262
263 // If the block ends with two unconditional branches, handle it. The second
264 // one is not executed, so remove it.
265 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
266 TBB = SecondLastInst->getOperand(0).getMBB();
267 I = LastInst;
268 if (AllowModify)
269 I->eraseFromParent();
270 return false;
271 }
272
273 // ...likewise if it ends with an indirect branch followed by an unconditional
274 // branch.
275 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
276 I = LastInst;
277 if (AllowModify)
278 I->eraseFromParent();
279 return true;
280 }
281
282 // Otherwise, can't handle this.
283 return true;
284}
285
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000286bool AArch64InstrInfo::reverseBranchCondition(
Tim Northover3b0846e2014-05-24 12:50:23 +0000287 SmallVectorImpl<MachineOperand> &Cond) const {
288 if (Cond[0].getImm() != -1) {
289 // Regular Bcc
290 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
291 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
292 } else {
293 // Folded compare-and-branch
294 switch (Cond[1].getImm()) {
295 default:
296 llvm_unreachable("Unknown conditional branch!");
297 case AArch64::CBZW:
298 Cond[1].setImm(AArch64::CBNZW);
299 break;
300 case AArch64::CBNZW:
301 Cond[1].setImm(AArch64::CBZW);
302 break;
303 case AArch64::CBZX:
304 Cond[1].setImm(AArch64::CBNZX);
305 break;
306 case AArch64::CBNZX:
307 Cond[1].setImm(AArch64::CBZX);
308 break;
309 case AArch64::TBZW:
310 Cond[1].setImm(AArch64::TBNZW);
311 break;
312 case AArch64::TBNZW:
313 Cond[1].setImm(AArch64::TBZW);
314 break;
315 case AArch64::TBZX:
316 Cond[1].setImm(AArch64::TBNZX);
317 break;
318 case AArch64::TBNZX:
319 Cond[1].setImm(AArch64::TBZX);
320 break;
321 }
322 }
323
324 return false;
325}
326
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +0000327unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000328 int *BytesRemoved) const {
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000329 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
330 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000331 return 0;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000332
Tim Northover3b0846e2014-05-24 12:50:23 +0000333 if (!isUncondBranchOpcode(I->getOpcode()) &&
334 !isCondBranchOpcode(I->getOpcode()))
335 return 0;
336
337 // Remove the branch.
338 I->eraseFromParent();
339
340 I = MBB.end();
341
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000342 if (I == MBB.begin()) {
343 if (BytesRemoved)
344 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000345 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000346 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000347 --I;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000348 if (!isCondBranchOpcode(I->getOpcode())) {
349 if (BytesRemoved)
350 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000351 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000352 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000353
354 // Remove the branch.
355 I->eraseFromParent();
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000356 if (BytesRemoved)
357 *BytesRemoved = 8;
358
Tim Northover3b0846e2014-05-24 12:50:23 +0000359 return 2;
360}
361
362void AArch64InstrInfo::instantiateCondBranch(
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000363 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000364 ArrayRef<MachineOperand> Cond) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000365 if (Cond[0].getImm() != -1) {
366 // Regular Bcc
367 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
368 } else {
369 // Folded compare-and-branch
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000370 // Note that we use addOperand instead of addReg to keep the flags.
Tim Northover3b0846e2014-05-24 12:50:23 +0000371 const MachineInstrBuilder MIB =
Diana Picus116bbab2017-01-13 09:58:52 +0000372 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
Tim Northover3b0846e2014-05-24 12:50:23 +0000373 if (Cond.size() > 3)
374 MIB.addImm(Cond[3].getImm());
375 MIB.addMBB(TBB);
376 }
377}
378
Jessica Paquette809d7082017-07-28 03:21:58 +0000379unsigned AArch64InstrInfo::insertBranch(
380 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
381 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000382 // Shouldn't be a fall through.
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +0000383 assert(TBB && "insertBranch must not be told to insert a fallthrough");
Tim Northover3b0846e2014-05-24 12:50:23 +0000384
385 if (!FBB) {
386 if (Cond.empty()) // Unconditional branch?
387 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
388 else
389 instantiateCondBranch(MBB, DL, TBB, Cond);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000390
391 if (BytesAdded)
392 *BytesAdded = 4;
393
Tim Northover3b0846e2014-05-24 12:50:23 +0000394 return 1;
395 }
396
397 // Two-way conditional branch.
398 instantiateCondBranch(MBB, DL, TBB, Cond);
399 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000400
401 if (BytesAdded)
402 *BytesAdded = 8;
403
Tim Northover3b0846e2014-05-24 12:50:23 +0000404 return 2;
405}
406
407// Find the original register that VReg is copied from.
408static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
409 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
410 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
411 if (!DefMI->isFullCopy())
412 return VReg;
413 VReg = DefMI->getOperand(1).getReg();
414 }
415 return VReg;
416}
417
418// Determine if VReg is defined by an instruction that can be folded into a
419// csel instruction. If so, return the folded opcode, and the replacement
420// register.
421static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
422 unsigned *NewVReg = nullptr) {
423 VReg = removeCopies(MRI, VReg);
424 if (!TargetRegisterInfo::isVirtualRegister(VReg))
425 return 0;
426
427 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
428 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
429 unsigned Opc = 0;
430 unsigned SrcOpNum = 0;
431 switch (DefMI->getOpcode()) {
432 case AArch64::ADDSXri:
433 case AArch64::ADDSWri:
434 // if NZCV is used, do not fold.
435 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
436 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000437 // fall-through to ADDXri and ADDWri.
438 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000439 case AArch64::ADDXri:
440 case AArch64::ADDWri:
441 // add x, 1 -> csinc.
442 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
443 DefMI->getOperand(3).getImm() != 0)
444 return 0;
445 SrcOpNum = 1;
446 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
447 break;
448
449 case AArch64::ORNXrr:
450 case AArch64::ORNWrr: {
451 // not x -> csinv, represented as orn dst, xzr, src.
452 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
453 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
454 return 0;
455 SrcOpNum = 2;
456 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
457 break;
458 }
459
460 case AArch64::SUBSXrr:
461 case AArch64::SUBSWrr:
462 // if NZCV is used, do not fold.
463 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
464 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000465 // fall-through to SUBXrr and SUBWrr.
466 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000467 case AArch64::SUBXrr:
468 case AArch64::SUBWrr: {
469 // neg x -> csneg, represented as sub dst, xzr, src.
470 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
471 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
472 return 0;
473 SrcOpNum = 2;
474 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
475 break;
476 }
477 default:
478 return 0;
479 }
480 assert(Opc && SrcOpNum && "Missing parameters");
481
482 if (NewVReg)
483 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
484 return Opc;
485}
486
Jessica Paquette809d7082017-07-28 03:21:58 +0000487bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
488 ArrayRef<MachineOperand> Cond,
489 unsigned TrueReg, unsigned FalseReg,
490 int &CondCycles, int &TrueCycles,
491 int &FalseCycles) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000492 // Check register classes.
493 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
494 const TargetRegisterClass *RC =
Eric Christophera0de2532015-03-18 20:37:30 +0000495 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
Tim Northover3b0846e2014-05-24 12:50:23 +0000496 if (!RC)
497 return false;
498
499 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
500 unsigned ExtraCondLat = Cond.size() != 1;
501
502 // GPRs are handled by csel.
503 // FIXME: Fold in x+1, -x, and ~x when applicable.
504 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
505 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
506 // Single-cycle csel, csinc, csinv, and csneg.
507 CondCycles = 1 + ExtraCondLat;
508 TrueCycles = FalseCycles = 1;
509 if (canFoldIntoCSel(MRI, TrueReg))
510 TrueCycles = 0;
511 else if (canFoldIntoCSel(MRI, FalseReg))
512 FalseCycles = 0;
513 return true;
514 }
515
516 // Scalar floating point is handled by fcsel.
517 // FIXME: Form fabs, fmin, and fmax when applicable.
518 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
519 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
520 CondCycles = 5 + ExtraCondLat;
521 TrueCycles = FalseCycles = 2;
522 return true;
523 }
524
525 // Can't do vectors.
526 return false;
527}
528
529void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000530 MachineBasicBlock::iterator I,
531 const DebugLoc &DL, unsigned DstReg,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000532 ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000533 unsigned TrueReg, unsigned FalseReg) const {
534 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
535
536 // Parse the condition code, see parseCondBranch() above.
537 AArch64CC::CondCode CC;
538 switch (Cond.size()) {
539 default:
540 llvm_unreachable("Unknown condition opcode in Cond");
541 case 1: // b.cc
542 CC = AArch64CC::CondCode(Cond[0].getImm());
543 break;
544 case 3: { // cbz/cbnz
545 // We must insert a compare against 0.
546 bool Is64Bit;
547 switch (Cond[1].getImm()) {
548 default:
549 llvm_unreachable("Unknown branch opcode in Cond");
550 case AArch64::CBZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000551 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000552 CC = AArch64CC::EQ;
553 break;
554 case AArch64::CBZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000555 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000556 CC = AArch64CC::EQ;
557 break;
558 case AArch64::CBNZW:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000559 Is64Bit = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000560 CC = AArch64CC::NE;
561 break;
562 case AArch64::CBNZX:
Eugene Zelenko049b0172017-01-06 00:30:53 +0000563 Is64Bit = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000564 CC = AArch64CC::NE;
565 break;
566 }
567 unsigned SrcReg = Cond[2].getReg();
568 if (Is64Bit) {
569 // cmp reg, #0 is actually subs xzr, reg, #0.
570 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
571 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
572 .addReg(SrcReg)
573 .addImm(0)
574 .addImm(0);
575 } else {
576 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
577 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
578 .addReg(SrcReg)
579 .addImm(0)
580 .addImm(0);
581 }
582 break;
583 }
584 case 4: { // tbz/tbnz
585 // We must insert a tst instruction.
586 switch (Cond[1].getImm()) {
587 default:
588 llvm_unreachable("Unknown branch opcode in Cond");
589 case AArch64::TBZW:
590 case AArch64::TBZX:
591 CC = AArch64CC::EQ;
592 break;
593 case AArch64::TBNZW:
594 case AArch64::TBNZX:
595 CC = AArch64CC::NE;
596 break;
597 }
598 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
599 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
600 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
601 .addReg(Cond[2].getReg())
602 .addImm(
603 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
604 else
605 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
606 .addReg(Cond[2].getReg())
607 .addImm(
608 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
609 break;
610 }
611 }
612
613 unsigned Opc = 0;
614 const TargetRegisterClass *RC = nullptr;
615 bool TryFold = false;
616 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
617 RC = &AArch64::GPR64RegClass;
618 Opc = AArch64::CSELXr;
619 TryFold = true;
620 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
621 RC = &AArch64::GPR32RegClass;
622 Opc = AArch64::CSELWr;
623 TryFold = true;
624 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
625 RC = &AArch64::FPR64RegClass;
626 Opc = AArch64::FCSELDrrr;
627 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
628 RC = &AArch64::FPR32RegClass;
629 Opc = AArch64::FCSELSrrr;
630 }
631 assert(RC && "Unsupported regclass");
632
633 // Try folding simple instructions into the csel.
634 if (TryFold) {
635 unsigned NewVReg = 0;
636 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
637 if (FoldedOpc) {
638 // The folded opcodes csinc, csinc and csneg apply the operation to
639 // FalseReg, so we need to invert the condition.
640 CC = AArch64CC::getInvertedCondCode(CC);
641 TrueReg = FalseReg;
642 } else
643 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
644
645 // Fold the operation. Leave any dead instructions for DCE to clean up.
646 if (FoldedOpc) {
647 FalseReg = NewVReg;
648 Opc = FoldedOpc;
649 // The extends the live range of NewVReg.
650 MRI.clearKillFlags(NewVReg);
651 }
652 }
653
654 // Pull all virtual register into the appropriate class.
655 MRI.constrainRegClass(TrueReg, RC);
656 MRI.constrainRegClass(FalseReg, RC);
657
658 // Insert the csel.
Jessica Paquette809d7082017-07-28 03:21:58 +0000659 BuildMI(MBB, I, DL, get(Opc), DstReg)
660 .addReg(TrueReg)
661 .addReg(FalseReg)
662 .addImm(CC);
Tim Northover3b0846e2014-05-24 12:50:23 +0000663}
664
Lawrence Hu687097a2015-07-23 23:55:28 +0000665/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000666static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
667 uint64_t Imm = MI.getOperand(1).getImm();
Weiming Zhaob33a5552015-07-23 19:24:53 +0000668 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
669 uint64_t Encoding;
670 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
671}
672
Jiangning Liucd296372014-07-29 02:09:26 +0000673// FIXME: this implementation should be micro-architecture dependent, so a
674// micro-architecture target hook should be introduced here in future.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000675bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
Matthias Braun651cff42016-06-02 18:03:53 +0000676 if (!Subtarget.hasCustomCheapAsMoveHandling())
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000677 return MI.isAsCheapAsAMove();
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000678
Evandro Menezes07c78ee2018-01-30 15:40:22 +0000679 if (Subtarget.hasExynosCheapAsMoveHandling()) {
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000680 if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
681 return true;
682 else
683 return MI.isAsCheapAsAMove();
684 }
Evandro Menezesd23324a2016-05-04 20:47:25 +0000685
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000686 switch (MI.getOpcode()) {
Jiangning Liucd296372014-07-29 02:09:26 +0000687 default:
688 return false;
689
690 // add/sub on register without shift
691 case AArch64::ADDWri:
692 case AArch64::ADDXri:
693 case AArch64::SUBWri:
694 case AArch64::SUBXri:
Evandro Menezes509516d2017-08-28 22:51:32 +0000695 return (MI.getOperand(3).getImm() == 0);
Jiangning Liucd296372014-07-29 02:09:26 +0000696
697 // logical ops on immediate
698 case AArch64::ANDWri:
699 case AArch64::ANDXri:
700 case AArch64::EORWri:
701 case AArch64::EORXri:
702 case AArch64::ORRWri:
703 case AArch64::ORRXri:
704 return true;
705
706 // logical ops on register without shift
707 case AArch64::ANDWrr:
708 case AArch64::ANDXrr:
709 case AArch64::BICWrr:
710 case AArch64::BICXrr:
711 case AArch64::EONWrr:
712 case AArch64::EONXrr:
713 case AArch64::EORWrr:
714 case AArch64::EORXrr:
715 case AArch64::ORNWrr:
716 case AArch64::ORNXrr:
717 case AArch64::ORRWrr:
718 case AArch64::ORRXrr:
719 return true;
Evandro Menezesd23324a2016-05-04 20:47:25 +0000720
Weiming Zhaob33a5552015-07-23 19:24:53 +0000721 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
722 // ORRXri, it is as cheap as MOV
723 case AArch64::MOVi32imm:
724 return canBeExpandedToORR(MI, 32);
725 case AArch64::MOVi64imm:
726 return canBeExpandedToORR(MI, 64);
Haicheng Wu711ca862016-07-12 15:31:41 +0000727
Haicheng Wuf0b01272016-07-15 00:27:01 +0000728 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
729 // feature.
Sjoerd Meijerb0eb5fb2017-08-24 14:47:06 +0000730 case AArch64::FMOVH0:
Haicheng Wu711ca862016-07-12 15:31:41 +0000731 case AArch64::FMOVS0:
732 case AArch64::FMOVD0:
733 return Subtarget.hasZeroCycleZeroing();
Haicheng Wuf0b01272016-07-15 00:27:01 +0000734 case TargetOpcode::COPY:
735 return (Subtarget.hasZeroCycleZeroing() &&
736 (MI.getOperand(1).getReg() == AArch64::WZR ||
737 MI.getOperand(1).getReg() == AArch64::XZR));
Jiangning Liucd296372014-07-29 02:09:26 +0000738 }
739
740 llvm_unreachable("Unknown opcode to check as cheap as a move!");
741}
742
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000743bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
Evandro Menezes1515e852018-03-15 20:31:13 +0000744 unsigned Reg, Imm, Shift;
745
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000746 switch (MI.getOpcode()) {
747 default:
748 return false;
749
Evandro Menezes1515e852018-03-15 20:31:13 +0000750 // MOV Rd, SP
751 case AArch64::ADDWri:
752 case AArch64::ADDXri:
753 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
754 return false;
755
756 Reg = MI.getOperand(1).getReg();
757 Imm = MI.getOperand(2).getImm();
758 return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
759
760 // Literal
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000761 case AArch64::ADR:
762 case AArch64::ADRP:
Evandro Menezes1515e852018-03-15 20:31:13 +0000763 return true;
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000764
Evandro Menezes1515e852018-03-15 20:31:13 +0000765 // MOVI Vd, #0
766 case AArch64::MOVID:
767 case AArch64::MOVIv8b_ns:
768 case AArch64::MOVIv2d_ns:
769 case AArch64::MOVIv16b_ns:
770 Imm = MI.getOperand(1).getImm();
771 return (Imm == 0);
772
773 // MOVI Vd, #0
774 case AArch64::MOVIv2i32:
775 case AArch64::MOVIv4i16:
776 case AArch64::MOVIv4i32:
777 case AArch64::MOVIv8i16:
778 Imm = MI.getOperand(1).getImm();
779 Shift = MI.getOperand(2).getImm();
780 return (Imm == 0 && Shift == 0);
781
782 // MOV Rd, Imm
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000783 case AArch64::MOVNWi:
784 case AArch64::MOVNXi:
Evandro Menezes1515e852018-03-15 20:31:13 +0000785
786 // MOV Rd, Imm
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000787 case AArch64::MOVZWi:
788 case AArch64::MOVZXi:
789 return true;
790
Evandro Menezes1515e852018-03-15 20:31:13 +0000791 // MOV Rd, Imm
792 case AArch64::ORRWri:
793 case AArch64::ORRXri:
794 if (!MI.getOperand(1).isReg())
795 return false;
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000796
Evandro Menezes1515e852018-03-15 20:31:13 +0000797 Reg = MI.getOperand(1).getReg();
798 Imm = MI.getOperand(2).getImm();
799 return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
800
801 // MOV Rd, Rm
802 case AArch64::ORRWrs:
803 case AArch64::ORRXrs:
804 if (!MI.getOperand(1).isReg())
805 return false;
806
807 Reg = MI.getOperand(1).getReg();
808 Imm = MI.getOperand(3).getImm();
809 Shift = AArch64_AM::getShiftValue(Imm);
810 return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
Evandro Menezes9f9daa12018-01-30 15:40:16 +0000811 }
812}
813
Evandro Menezes509516d2017-08-28 22:51:32 +0000814bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
815 unsigned Imm, Shift;
Evandro Menezes91650ef2017-09-18 19:00:36 +0000816 AArch64_AM::ShiftExtendType Ext;
Evandro Menezes509516d2017-08-28 22:51:32 +0000817
818 switch (MI.getOpcode()) {
819 default:
820 return false;
821
822 // WriteI
823 case AArch64::ADDSWri:
824 case AArch64::ADDSXri:
825 case AArch64::ADDWri:
826 case AArch64::ADDXri:
827 case AArch64::SUBSWri:
828 case AArch64::SUBSXri:
829 case AArch64::SUBWri:
830 case AArch64::SUBXri:
831 return true;
832
833 // WriteISReg
834 case AArch64::ADDSWrs:
835 case AArch64::ADDSXrs:
836 case AArch64::ADDWrs:
837 case AArch64::ADDXrs:
838 case AArch64::ANDSWrs:
839 case AArch64::ANDSXrs:
840 case AArch64::ANDWrs:
841 case AArch64::ANDXrs:
842 case AArch64::BICSWrs:
843 case AArch64::BICSXrs:
844 case AArch64::BICWrs:
845 case AArch64::BICXrs:
846 case AArch64::EONWrs:
847 case AArch64::EONXrs:
848 case AArch64::EORWrs:
849 case AArch64::EORXrs:
850 case AArch64::ORNWrs:
851 case AArch64::ORNXrs:
852 case AArch64::ORRWrs:
853 case AArch64::ORRXrs:
854 case AArch64::SUBSWrs:
855 case AArch64::SUBSXrs:
856 case AArch64::SUBWrs:
857 case AArch64::SUBXrs:
858 Imm = MI.getOperand(3).getImm();
859 Shift = AArch64_AM::getShiftValue(Imm);
Evandro Menezes91650ef2017-09-18 19:00:36 +0000860 Ext = AArch64_AM::getShiftType(Imm);
861 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
Evandro Menezes509516d2017-08-28 22:51:32 +0000862
863 // WriteIEReg
864 case AArch64::ADDSWrx:
865 case AArch64::ADDSXrx:
866 case AArch64::ADDSXrx64:
867 case AArch64::ADDWrx:
868 case AArch64::ADDXrx:
869 case AArch64::ADDXrx64:
870 case AArch64::SUBSWrx:
871 case AArch64::SUBSXrx:
872 case AArch64::SUBSXrx64:
873 case AArch64::SUBWrx:
874 case AArch64::SUBXrx:
875 case AArch64::SUBXrx64:
876 Imm = MI.getOperand(3).getImm();
877 Shift = AArch64_AM::getArithShiftValue(Imm);
Evandro Menezes91650ef2017-09-18 19:00:36 +0000878 Ext = AArch64_AM::getArithExtendType(Imm);
879 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
880
881 case AArch64::PRFMroW:
882 case AArch64::PRFMroX:
883
884 // WriteLDIdx
885 case AArch64::LDRBBroW:
886 case AArch64::LDRBBroX:
887 case AArch64::LDRHHroW:
888 case AArch64::LDRHHroX:
889 case AArch64::LDRSBWroW:
890 case AArch64::LDRSBWroX:
891 case AArch64::LDRSBXroW:
892 case AArch64::LDRSBXroX:
893 case AArch64::LDRSHWroW:
894 case AArch64::LDRSHWroX:
895 case AArch64::LDRSHXroW:
896 case AArch64::LDRSHXroX:
897 case AArch64::LDRSWroW:
898 case AArch64::LDRSWroX:
899 case AArch64::LDRWroW:
900 case AArch64::LDRWroX:
901 case AArch64::LDRXroW:
902 case AArch64::LDRXroX:
903
904 case AArch64::LDRBroW:
905 case AArch64::LDRBroX:
906 case AArch64::LDRDroW:
907 case AArch64::LDRDroX:
908 case AArch64::LDRHroW:
909 case AArch64::LDRHroX:
910 case AArch64::LDRSroW:
911 case AArch64::LDRSroX:
912
913 // WriteSTIdx
914 case AArch64::STRBBroW:
915 case AArch64::STRBBroX:
916 case AArch64::STRHHroW:
917 case AArch64::STRHHroX:
918 case AArch64::STRWroW:
919 case AArch64::STRWroX:
920 case AArch64::STRXroW:
921 case AArch64::STRXroX:
922
923 case AArch64::STRBroW:
924 case AArch64::STRBroX:
925 case AArch64::STRDroW:
926 case AArch64::STRDroX:
927 case AArch64::STRHroW:
928 case AArch64::STRHroX:
929 case AArch64::STRSroW:
930 case AArch64::STRSroX:
931 Imm = MI.getOperand(3).getImm();
932 Ext = AArch64_AM::getMemExtendType(Imm);
933 return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
Evandro Menezes509516d2017-08-28 22:51:32 +0000934 }
935}
936
Geoff Berryd6ac96f2017-05-23 19:57:45 +0000937bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
938 switch (MI.getOpcode()) {
939 default:
Balaram Makamb4419f92017-04-08 03:30:15 +0000940 return false;
Geoff Berryd6ac96f2017-05-23 19:57:45 +0000941
942 case AArch64::ADDWrs:
943 case AArch64::ADDXrs:
944 case AArch64::ADDSWrs:
945 case AArch64::ADDSXrs: {
946 unsigned Imm = MI.getOperand(3).getImm();
947 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
948 if (ShiftVal == 0)
949 return true;
950 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
951 }
952
953 case AArch64::ADDWrx:
954 case AArch64::ADDXrx:
955 case AArch64::ADDXrx64:
956 case AArch64::ADDSWrx:
957 case AArch64::ADDSXrx:
958 case AArch64::ADDSXrx64: {
959 unsigned Imm = MI.getOperand(3).getImm();
960 switch (AArch64_AM::getArithExtendType(Imm)) {
961 default:
962 return false;
963 case AArch64_AM::UXTB:
964 case AArch64_AM::UXTH:
965 case AArch64_AM::UXTW:
966 case AArch64_AM::UXTX:
967 return AArch64_AM::getArithShiftValue(Imm) <= 4;
968 }
969 }
970
971 case AArch64::SUBWrs:
972 case AArch64::SUBSWrs: {
973 unsigned Imm = MI.getOperand(3).getImm();
974 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
975 return ShiftVal == 0 ||
976 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
977 }
978
979 case AArch64::SUBXrs:
980 case AArch64::SUBSXrs: {
981 unsigned Imm = MI.getOperand(3).getImm();
982 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
983 return ShiftVal == 0 ||
984 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
985 }
986
987 case AArch64::SUBWrx:
988 case AArch64::SUBXrx:
989 case AArch64::SUBXrx64:
990 case AArch64::SUBSWrx:
991 case AArch64::SUBSXrx:
992 case AArch64::SUBSXrx64: {
993 unsigned Imm = MI.getOperand(3).getImm();
994 switch (AArch64_AM::getArithExtendType(Imm)) {
995 default:
996 return false;
997 case AArch64_AM::UXTB:
998 case AArch64_AM::UXTH:
999 case AArch64_AM::UXTW:
1000 case AArch64_AM::UXTX:
1001 return AArch64_AM::getArithShiftValue(Imm) == 0;
1002 }
1003 }
1004
1005 case AArch64::LDRBBroW:
1006 case AArch64::LDRBBroX:
1007 case AArch64::LDRBroW:
1008 case AArch64::LDRBroX:
1009 case AArch64::LDRDroW:
1010 case AArch64::LDRDroX:
1011 case AArch64::LDRHHroW:
1012 case AArch64::LDRHHroX:
1013 case AArch64::LDRHroW:
1014 case AArch64::LDRHroX:
1015 case AArch64::LDRQroW:
1016 case AArch64::LDRQroX:
1017 case AArch64::LDRSBWroW:
1018 case AArch64::LDRSBWroX:
1019 case AArch64::LDRSBXroW:
1020 case AArch64::LDRSBXroX:
1021 case AArch64::LDRSHWroW:
1022 case AArch64::LDRSHWroX:
1023 case AArch64::LDRSHXroW:
1024 case AArch64::LDRSHXroX:
1025 case AArch64::LDRSWroW:
1026 case AArch64::LDRSWroX:
1027 case AArch64::LDRSroW:
1028 case AArch64::LDRSroX:
1029 case AArch64::LDRWroW:
1030 case AArch64::LDRWroX:
1031 case AArch64::LDRXroW:
1032 case AArch64::LDRXroX:
1033 case AArch64::PRFMroW:
1034 case AArch64::PRFMroX:
1035 case AArch64::STRBBroW:
1036 case AArch64::STRBBroX:
1037 case AArch64::STRBroW:
1038 case AArch64::STRBroX:
1039 case AArch64::STRDroW:
1040 case AArch64::STRDroX:
1041 case AArch64::STRHHroW:
1042 case AArch64::STRHHroX:
1043 case AArch64::STRHroW:
1044 case AArch64::STRHroX:
1045 case AArch64::STRQroW:
1046 case AArch64::STRQroX:
1047 case AArch64::STRSroW:
1048 case AArch64::STRSroX:
1049 case AArch64::STRWroW:
1050 case AArch64::STRWroX:
1051 case AArch64::STRXroW:
1052 case AArch64::STRXroX: {
1053 unsigned IsSigned = MI.getOperand(3).getImm();
1054 return !IsSigned;
1055 }
1056 }
Balaram Makamb4419f92017-04-08 03:30:15 +00001057}
1058
Tim Northover3b0846e2014-05-24 12:50:23 +00001059bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
1060 unsigned &SrcReg, unsigned &DstReg,
1061 unsigned &SubIdx) const {
1062 switch (MI.getOpcode()) {
1063 default:
1064 return false;
1065 case AArch64::SBFMXri: // aka sxtw
1066 case AArch64::UBFMXri: // aka uxtw
1067 // Check for the 32 -> 64 bit extension case, these instructions can do
1068 // much more.
1069 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1070 return false;
1071 // This is a signed or unsigned 32 -> 64 bit extension.
1072 SrcReg = MI.getOperand(1).getReg();
1073 DstReg = MI.getOperand(0).getReg();
1074 SubIdx = AArch64::sub_32;
1075 return true;
1076 }
1077}
1078
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001079bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1080 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
Eric Christophera0de2532015-03-18 20:37:30 +00001081 const TargetRegisterInfo *TRI = &getRegisterInfo();
Chad Rosier3528c1e2014-09-08 14:43:48 +00001082 unsigned BaseRegA = 0, BaseRegB = 0;
Chad Rosier0da267d2016-03-09 16:46:48 +00001083 int64_t OffsetA = 0, OffsetB = 0;
1084 unsigned WidthA = 0, WidthB = 0;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001085
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001086 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1087 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001088
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001089 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1090 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
Chad Rosier3528c1e2014-09-08 14:43:48 +00001091 return false;
1092
1093 // Retrieve the base register, offset from the base register and width. Width
1094 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1095 // base registers are identical, and the offset of a lower memory access +
1096 // the width doesn't overlap the offset of a higher memory access,
1097 // then the memory accesses are different.
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001098 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1099 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
Chad Rosier3528c1e2014-09-08 14:43:48 +00001100 if (BaseRegA == BaseRegB) {
1101 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1102 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1103 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1104 if (LowOffset + LowWidth <= HighOffset)
1105 return true;
1106 }
1107 }
1108 return false;
1109}
1110
Tim Northover3b0846e2014-05-24 12:50:23 +00001111/// analyzeCompare - For a comparison instruction, return the source registers
1112/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1113/// Return true if the comparison instruction can be analyzed.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001114bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
Tim Northover3b0846e2014-05-24 12:50:23 +00001115 unsigned &SrcReg2, int &CmpMask,
1116 int &CmpValue) const {
Tim Northover350a87e2017-10-17 21:43:52 +00001117 // The first operand can be a frame index where we'd normally expect a
1118 // register.
1119 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1120 if (!MI.getOperand(1).isReg())
1121 return false;
1122
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001123 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001124 default:
1125 break;
1126 case AArch64::SUBSWrr:
1127 case AArch64::SUBSWrs:
1128 case AArch64::SUBSWrx:
1129 case AArch64::SUBSXrr:
1130 case AArch64::SUBSXrs:
1131 case AArch64::SUBSXrx:
1132 case AArch64::ADDSWrr:
1133 case AArch64::ADDSWrs:
1134 case AArch64::ADDSWrx:
1135 case AArch64::ADDSXrr:
1136 case AArch64::ADDSXrs:
1137 case AArch64::ADDSXrx:
1138 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001139 SrcReg = MI.getOperand(1).getReg();
1140 SrcReg2 = MI.getOperand(2).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001141 CmpMask = ~0;
1142 CmpValue = 0;
1143 return true;
1144 case AArch64::SUBSWri:
1145 case AArch64::ADDSWri:
1146 case AArch64::SUBSXri:
1147 case AArch64::ADDSXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001148 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001149 SrcReg2 = 0;
1150 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +00001151 // FIXME: In order to convert CmpValue to 0 or 1
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001152 CmpValue = MI.getOperand(2).getImm() != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001153 return true;
1154 case AArch64::ANDSWri:
1155 case AArch64::ANDSXri:
1156 // ANDS does not use the same encoding scheme as the others xxxS
1157 // instructions.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001158 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001159 SrcReg2 = 0;
1160 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +00001161 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1162 // while the type of CmpValue is int. When converting uint64_t to int,
1163 // the high 32 bits of uint64_t will be lost.
1164 // In fact it causes a bug in spec2006-483.xalancbmk
1165 // CmpValue is only used to compare with zero in OptimizeCompareInstr
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001166 CmpValue = AArch64_AM::decodeLogicalImmediate(
1167 MI.getOperand(2).getImm(),
1168 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001169 return true;
1170 }
1171
1172 return false;
1173}
1174
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001175static bool UpdateOperandRegClass(MachineInstr &Instr) {
1176 MachineBasicBlock *MBB = Instr.getParent();
Tim Northover3b0846e2014-05-24 12:50:23 +00001177 assert(MBB && "Can't get MachineBasicBlock here");
1178 MachineFunction *MF = MBB->getParent();
1179 assert(MF && "Can't get MachineFunction here");
Eric Christopher6c901622015-01-28 03:51:33 +00001180 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1181 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001182 MachineRegisterInfo *MRI = &MF->getRegInfo();
1183
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001184 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
Tim Northover3b0846e2014-05-24 12:50:23 +00001185 ++OpIdx) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001186 MachineOperand &MO = Instr.getOperand(OpIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001187 const TargetRegisterClass *OpRegCstraints =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001188 Instr.getRegClassConstraint(OpIdx, TII, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001189
1190 // If there's no constraint, there's nothing to do.
1191 if (!OpRegCstraints)
1192 continue;
1193 // If the operand is a frame index, there's nothing to do here.
1194 // A frame index operand will resolve correctly during PEI.
1195 if (MO.isFI())
1196 continue;
1197
1198 assert(MO.isReg() &&
1199 "Operand has register constraints without being a register!");
1200
1201 unsigned Reg = MO.getReg();
1202 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1203 if (!OpRegCstraints->contains(Reg))
1204 return false;
1205 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1206 !MRI->constrainRegClass(Reg, OpRegCstraints))
1207 return false;
1208 }
1209
1210 return true;
1211}
1212
Adrian Prantl5f8f34e42018-05-01 15:54:18 +00001213/// Return the opcode that does not set flags when possible - otherwise
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001214/// return the original opcode. The caller is responsible to do the actual
1215/// substitution and legality checking.
Chad Rosier6db9ff62017-06-23 19:20:12 +00001216static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001217 // Don't convert all compare instructions, because for some the zero register
1218 // encoding becomes the sp register.
1219 bool MIDefinesZeroReg = false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001220 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001221 MIDefinesZeroReg = true;
1222
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001223 switch (MI.getOpcode()) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001224 default:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001225 return MI.getOpcode();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001226 case AArch64::ADDSWrr:
1227 return AArch64::ADDWrr;
1228 case AArch64::ADDSWri:
1229 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1230 case AArch64::ADDSWrs:
1231 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1232 case AArch64::ADDSWrx:
1233 return AArch64::ADDWrx;
1234 case AArch64::ADDSXrr:
1235 return AArch64::ADDXrr;
1236 case AArch64::ADDSXri:
1237 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1238 case AArch64::ADDSXrs:
1239 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1240 case AArch64::ADDSXrx:
1241 return AArch64::ADDXrx;
1242 case AArch64::SUBSWrr:
1243 return AArch64::SUBWrr;
1244 case AArch64::SUBSWri:
1245 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1246 case AArch64::SUBSWrs:
1247 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1248 case AArch64::SUBSWrx:
1249 return AArch64::SUBWrx;
1250 case AArch64::SUBSXrr:
1251 return AArch64::SUBXrr;
1252 case AArch64::SUBSXri:
1253 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1254 case AArch64::SUBSXrs:
1255 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1256 case AArch64::SUBSXrx:
1257 return AArch64::SUBXrx;
1258 }
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001259}
Tim Northover3b0846e2014-05-24 12:50:23 +00001260
Jessica Paquette809d7082017-07-28 03:21:58 +00001261enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001262
1263/// True when condition flags are accessed (either by writing or reading)
1264/// on the instruction trace starting at From and ending at To.
1265///
1266/// Note: If From and To are from different blocks it's assumed CC are accessed
1267/// on the path.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001268static bool areCFlagsAccessedBetweenInstrs(
1269 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1270 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001271 // Early exit if To is at the beginning of the BB.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001272 if (To == To->getParent()->begin())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001273 return true;
1274
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001275 // Check whether the instructions are in the same basic block
1276 // If not, assume the condition flags might get modified somewhere.
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001277 if (To->getParent() != From->getParent())
1278 return true;
1279
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001280 // From must be above To.
Duncan P. N. Exon Smith18720962016-09-11 18:51:28 +00001281 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1282 [From](MachineInstr &MI) {
1283 return MI.getIterator() == From;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001284 }) != To->getParent()->rend());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001285
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001286 // We iterate backward starting \p To until we hit \p From.
1287 for (--To; To != From; --To) {
1288 const MachineInstr &Instr = *To;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001289
Jessica Paquette809d7082017-07-28 03:21:58 +00001290 if (((AccessToCheck & AK_Write) &&
1291 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1292 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00001293 return true;
1294 }
1295 return false;
1296}
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001297
1298/// Try to optimize a compare instruction. A compare instruction is an
Jessica Paquette809d7082017-07-28 03:21:58 +00001299/// instruction which produces AArch64::NZCV. It can be truly compare
1300/// instruction
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001301/// when there are no uses of its destination register.
1302///
1303/// The following steps are tried in order:
1304/// 1. Convert CmpInstr into an unconditional version.
1305/// 2. Remove CmpInstr if above there is an instruction producing a needed
Jessica Paquette809d7082017-07-28 03:21:58 +00001306/// condition code or an instruction which can be converted into such an
1307/// instruction.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001308/// Only comparison with zero is supported.
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001309bool AArch64InstrInfo::optimizeCompareInstr(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001310 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001311 int CmpValue, const MachineRegisterInfo *MRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001312 assert(CmpInstr.getParent());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001313 assert(MRI);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001314
1315 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001316 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001317 if (DeadNZCVIdx != -1) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001318 if (CmpInstr.definesRegister(AArch64::WZR) ||
1319 CmpInstr.definesRegister(AArch64::XZR)) {
1320 CmpInstr.eraseFromParent();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +00001321 return true;
1322 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001323 unsigned Opc = CmpInstr.getOpcode();
Chad Rosier6db9ff62017-06-23 19:20:12 +00001324 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00001325 if (NewOpc == Opc)
1326 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001327 const MCInstrDesc &MCID = get(NewOpc);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001328 CmpInstr.setDesc(MCID);
1329 CmpInstr.RemoveOperand(DeadNZCVIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001330 bool succeeded = UpdateOperandRegClass(CmpInstr);
1331 (void)succeeded;
1332 assert(succeeded && "Some operands reg class are incompatible!");
1333 return true;
1334 }
1335
1336 // Continue only if we have a "ri" where immediate is zero.
Jiangning Liudcc651f2014-08-08 14:19:29 +00001337 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1338 // function.
1339 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
Tim Northover3b0846e2014-05-24 12:50:23 +00001340 if (CmpValue != 0 || SrcReg2 != 0)
1341 return false;
1342
1343 // CmpInstr is a Compare instruction if destination register is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001344 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
Tim Northover3b0846e2014-05-24 12:50:23 +00001345 return false;
1346
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001347 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001348}
Tim Northover3b0846e2014-05-24 12:50:23 +00001349
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001350/// Get opcode of S version of Instr.
1351/// If Instr is S version its opcode is returned.
1352/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1353/// or we are not interested in it.
1354static unsigned sForm(MachineInstr &Instr) {
1355 switch (Instr.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001356 default:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001357 return AArch64::INSTRUCTION_LIST_END;
1358
Tim Northover3b0846e2014-05-24 12:50:23 +00001359 case AArch64::ADDSWrr:
1360 case AArch64::ADDSWri:
1361 case AArch64::ADDSXrr:
1362 case AArch64::ADDSXri:
1363 case AArch64::SUBSWrr:
1364 case AArch64::SUBSWri:
1365 case AArch64::SUBSXrr:
1366 case AArch64::SUBSXri:
Eugene Zelenko049b0172017-01-06 00:30:53 +00001367 return Instr.getOpcode();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001368
Jessica Paquette809d7082017-07-28 03:21:58 +00001369 case AArch64::ADDWrr:
1370 return AArch64::ADDSWrr;
1371 case AArch64::ADDWri:
1372 return AArch64::ADDSWri;
1373 case AArch64::ADDXrr:
1374 return AArch64::ADDSXrr;
1375 case AArch64::ADDXri:
1376 return AArch64::ADDSXri;
1377 case AArch64::ADCWr:
1378 return AArch64::ADCSWr;
1379 case AArch64::ADCXr:
1380 return AArch64::ADCSXr;
1381 case AArch64::SUBWrr:
1382 return AArch64::SUBSWrr;
1383 case AArch64::SUBWri:
1384 return AArch64::SUBSWri;
1385 case AArch64::SUBXrr:
1386 return AArch64::SUBSXrr;
1387 case AArch64::SUBXri:
1388 return AArch64::SUBSXri;
1389 case AArch64::SBCWr:
1390 return AArch64::SBCSWr;
1391 case AArch64::SBCXr:
1392 return AArch64::SBCSXr;
1393 case AArch64::ANDWri:
1394 return AArch64::ANDSWri;
1395 case AArch64::ANDXri:
1396 return AArch64::ANDSXri;
Tim Northover3b0846e2014-05-24 12:50:23 +00001397 }
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001398}
1399
1400/// Check if AArch64::NZCV should be alive in successors of MBB.
1401static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1402 for (auto *BB : MBB->successors())
1403 if (BB->isLiveIn(AArch64::NZCV))
1404 return true;
1405 return false;
1406}
1407
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001408namespace {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001409
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001410struct UsedNZCV {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001411 bool N = false;
1412 bool Z = false;
1413 bool C = false;
1414 bool V = false;
1415
1416 UsedNZCV() = default;
1417
Jessica Paquette809d7082017-07-28 03:21:58 +00001418 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001419 this->N |= UsedFlags.N;
1420 this->Z |= UsedFlags.Z;
1421 this->C |= UsedFlags.C;
1422 this->V |= UsedFlags.V;
1423 return *this;
1424 }
1425};
Eugene Zelenko049b0172017-01-06 00:30:53 +00001426
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001427} // end anonymous namespace
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001428
1429/// Find a condition code used by the instruction.
1430/// Returns AArch64CC::Invalid if either the instruction does not use condition
1431/// codes or we don't optimize CmpInstr in the presence of such instructions.
1432static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1433 switch (Instr.getOpcode()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001434 default:
1435 return AArch64CC::Invalid;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001436
Jessica Paquette809d7082017-07-28 03:21:58 +00001437 case AArch64::Bcc: {
1438 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1439 assert(Idx >= 2);
1440 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1441 }
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001442
Jessica Paquette809d7082017-07-28 03:21:58 +00001443 case AArch64::CSINVWr:
1444 case AArch64::CSINVXr:
1445 case AArch64::CSINCWr:
1446 case AArch64::CSINCXr:
1447 case AArch64::CSELWr:
1448 case AArch64::CSELXr:
1449 case AArch64::CSNEGWr:
1450 case AArch64::CSNEGXr:
1451 case AArch64::FCSELSrrr:
1452 case AArch64::FCSELDrrr: {
1453 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1454 assert(Idx >= 1);
1455 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1456 }
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001457 }
1458}
1459
1460static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1461 assert(CC != AArch64CC::Invalid);
1462 UsedNZCV UsedFlags;
1463 switch (CC) {
Jessica Paquette809d7082017-07-28 03:21:58 +00001464 default:
1465 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001466
Jessica Paquette809d7082017-07-28 03:21:58 +00001467 case AArch64CC::EQ: // Z set
1468 case AArch64CC::NE: // Z clear
1469 UsedFlags.Z = true;
1470 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001471
Jessica Paquette809d7082017-07-28 03:21:58 +00001472 case AArch64CC::HI: // Z clear and C set
1473 case AArch64CC::LS: // Z set or C clear
1474 UsedFlags.Z = true;
1475 LLVM_FALLTHROUGH;
1476 case AArch64CC::HS: // C set
1477 case AArch64CC::LO: // C clear
1478 UsedFlags.C = true;
1479 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001480
Jessica Paquette809d7082017-07-28 03:21:58 +00001481 case AArch64CC::MI: // N set
1482 case AArch64CC::PL: // N clear
1483 UsedFlags.N = true;
1484 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001485
Jessica Paquette809d7082017-07-28 03:21:58 +00001486 case AArch64CC::VS: // V set
1487 case AArch64CC::VC: // V clear
1488 UsedFlags.V = true;
1489 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001490
Jessica Paquette809d7082017-07-28 03:21:58 +00001491 case AArch64CC::GT: // Z clear, N and V the same
1492 case AArch64CC::LE: // Z set, N and V differ
1493 UsedFlags.Z = true;
1494 LLVM_FALLTHROUGH;
1495 case AArch64CC::GE: // N and V the same
1496 case AArch64CC::LT: // N and V differ
1497 UsedFlags.N = true;
1498 UsedFlags.V = true;
1499 break;
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001500 }
1501 return UsedFlags;
1502}
1503
1504static bool isADDSRegImm(unsigned Opcode) {
1505 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1506}
1507
1508static bool isSUBSRegImm(unsigned Opcode) {
1509 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1510}
1511
1512/// Check if CmpInstr can be substituted by MI.
1513///
1514/// CmpInstr can be substituted:
1515/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1516/// - and, MI and CmpInstr are from the same MachineBB
1517/// - and, condition flags are not alive in successors of the CmpInstr parent
1518/// - and, if MI opcode is the S form there must be no defs of flags between
1519/// MI and CmpInstr
1520/// or if MI opcode is not the S form there must be neither defs of flags
1521/// nor uses of flags between MI and CmpInstr.
1522/// - and C/V flags are not used after CmpInstr
1523static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
Jessica Paquette809d7082017-07-28 03:21:58 +00001524 const TargetRegisterInfo *TRI) {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001525 assert(MI);
1526 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1527 assert(CmpInstr);
1528
1529 const unsigned CmpOpcode = CmpInstr->getOpcode();
1530 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1531 return false;
1532
1533 if (MI->getParent() != CmpInstr->getParent())
1534 return false;
1535
1536 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1537 return false;
1538
1539 AccessKind AccessToCheck = AK_Write;
1540 if (sForm(*MI) != MI->getOpcode())
1541 AccessToCheck = AK_All;
1542 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1543 return false;
1544
1545 UsedNZCV NZCVUsedAfterCmp;
Jessica Paquette809d7082017-07-28 03:21:58 +00001546 for (auto I = std::next(CmpInstr->getIterator()),
1547 E = CmpInstr->getParent()->instr_end();
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001548 I != E; ++I) {
1549 const MachineInstr &Instr = *I;
1550 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1551 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1552 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1553 return false;
1554 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1555 }
1556
1557 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1558 break;
1559 }
Jessica Paquette809d7082017-07-28 03:21:58 +00001560
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001561 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1562}
1563
1564/// Substitute an instruction comparing to zero with another instruction
1565/// which produces needed condition flags.
1566///
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001567/// Return true on success.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001568bool AArch64InstrInfo::substituteCmpToZero(
1569 MachineInstr &CmpInstr, unsigned SrcReg,
1570 const MachineRegisterInfo *MRI) const {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001571 assert(MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001572 // Get the unique definition of SrcReg.
1573 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1574 if (!MI)
1575 return false;
1576
1577 const TargetRegisterInfo *TRI = &getRegisterInfo();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001578
1579 unsigned NewOpc = sForm(*MI);
1580 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1581 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001582
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001583 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001584 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001585
1586 // Update the instruction to set NZCV.
1587 MI->setDesc(get(NewOpc));
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001588 CmpInstr.eraseFromParent();
1589 bool succeeded = UpdateOperandRegClass(*MI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001590 (void)succeeded;
1591 assert(succeeded && "Some operands reg class are incompatible!");
1592 MI->addRegisterDefined(AArch64::NZCV, TRI);
1593 return true;
1594}
1595
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001596bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1597 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001598 return false;
1599
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001600 MachineBasicBlock &MBB = *MI.getParent();
1601 DebugLoc DL = MI.getDebugLoc();
1602 unsigned Reg = MI.getOperand(0).getReg();
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001603 const GlobalValue *GV =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001604 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001605 const TargetMachine &TM = MBB.getParent()->getTarget();
1606 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1607 const unsigned char MO_NC = AArch64II::MO_NC;
1608
1609 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1610 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1611 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1612 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001613 .addReg(Reg, RegState::Kill)
1614 .addImm(0)
1615 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001616 } else if (TM.getCodeModel() == CodeModel::Large) {
1617 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
Jessica Paquette809d7082017-07-28 03:21:58 +00001618 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1619 .addImm(0);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001620 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1621 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001622 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1623 .addImm(16);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001624 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1625 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001626 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1627 .addImm(32);
Evandro Menezes7960b2e2017-01-18 18:57:08 +00001628 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1629 .addReg(Reg, RegState::Kill)
Jessica Paquette809d7082017-07-28 03:21:58 +00001630 .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1631 .addImm(48);
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001632 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001633 .addReg(Reg, RegState::Kill)
1634 .addImm(0)
1635 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001636 } else {
1637 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1638 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1639 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1640 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1641 .addReg(Reg, RegState::Kill)
1642 .addGlobalAddress(GV, 0, LoFlags)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001643 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001644 }
1645
1646 MBB.erase(MI);
1647
1648 return true;
1649}
1650
Tim Northover3b0846e2014-05-24 12:50:23 +00001651/// Return true if this is this instruction has a non-zero immediate
Evandro Menezesb5f12092018-02-09 16:14:41 +00001652bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001653 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001654 default:
1655 break;
1656 case AArch64::ADDSWrs:
1657 case AArch64::ADDSXrs:
1658 case AArch64::ADDWrs:
1659 case AArch64::ADDXrs:
1660 case AArch64::ANDSWrs:
1661 case AArch64::ANDSXrs:
1662 case AArch64::ANDWrs:
1663 case AArch64::ANDXrs:
1664 case AArch64::BICSWrs:
1665 case AArch64::BICSXrs:
1666 case AArch64::BICWrs:
1667 case AArch64::BICXrs:
Tim Northover3b0846e2014-05-24 12:50:23 +00001668 case AArch64::EONWrs:
1669 case AArch64::EONXrs:
1670 case AArch64::EORWrs:
1671 case AArch64::EORXrs:
1672 case AArch64::ORNWrs:
1673 case AArch64::ORNXrs:
1674 case AArch64::ORRWrs:
1675 case AArch64::ORRXrs:
1676 case AArch64::SUBSWrs:
1677 case AArch64::SUBSXrs:
1678 case AArch64::SUBWrs:
1679 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001680 if (MI.getOperand(3).isImm()) {
1681 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001682 return (val != 0);
1683 }
1684 break;
1685 }
1686 return false;
1687}
1688
1689/// Return true if this is this instruction has a non-zero immediate
Evandro Menezesb5f12092018-02-09 16:14:41 +00001690bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001691 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001692 default:
1693 break;
1694 case AArch64::ADDSWrx:
1695 case AArch64::ADDSXrx:
1696 case AArch64::ADDSXrx64:
1697 case AArch64::ADDWrx:
1698 case AArch64::ADDXrx:
1699 case AArch64::ADDXrx64:
1700 case AArch64::SUBSWrx:
1701 case AArch64::SUBSXrx:
1702 case AArch64::SUBSXrx64:
1703 case AArch64::SUBWrx:
1704 case AArch64::SUBXrx:
1705 case AArch64::SUBXrx64:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001706 if (MI.getOperand(3).isImm()) {
1707 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001708 return (val != 0);
1709 }
1710 break;
1711 }
1712
1713 return false;
1714}
1715
1716// Return true if this instruction simply sets its single destination register
1717// to zero. This is equivalent to a register rename of the zero-register.
Evandro Menezesb5f12092018-02-09 16:14:41 +00001718bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001719 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001720 default:
1721 break;
1722 case AArch64::MOVZWi:
1723 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001724 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1725 assert(MI.getDesc().getNumOperands() == 3 &&
1726 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001727 return true;
1728 }
1729 break;
1730 case AArch64::ANDWri: // and Rd, Rzr, #imm
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001731 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001732 case AArch64::ANDXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001733 return MI.getOperand(1).getReg() == AArch64::XZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001734 case TargetOpcode::COPY:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001735 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001736 }
1737 return false;
1738}
1739
1740// Return true if this instruction simply renames a general register without
1741// modifying bits.
Evandro Menezesb5f12092018-02-09 16:14:41 +00001742bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001743 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001744 default:
1745 break;
1746 case TargetOpcode::COPY: {
1747 // GPR32 copies will by lowered to ORRXrs
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001748 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001749 return (AArch64::GPR32RegClass.contains(DstReg) ||
1750 AArch64::GPR64RegClass.contains(DstReg));
1751 }
1752 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001753 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1754 assert(MI.getDesc().getNumOperands() == 4 &&
1755 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001756 return true;
1757 }
Renato Golin541d7e72014-08-01 17:27:31 +00001758 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001759 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001760 if (MI.getOperand(2).getImm() == 0) {
1761 assert(MI.getDesc().getNumOperands() == 4 &&
1762 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001763 return true;
1764 }
Renato Golin541d7e72014-08-01 17:27:31 +00001765 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001766 }
1767 return false;
1768}
1769
1770// Return true if this instruction simply renames a general register without
1771// modifying bits.
Evandro Menezesb5f12092018-02-09 16:14:41 +00001772bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001773 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001774 default:
1775 break;
1776 case TargetOpcode::COPY: {
1777 // FPR64 copies will by lowered to ORR.16b
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001778 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001779 return (AArch64::FPR64RegClass.contains(DstReg) ||
1780 AArch64::FPR128RegClass.contains(DstReg));
1781 }
1782 case AArch64::ORRv16i8:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001783 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1784 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001785 "invalid ORRv16i8 operands");
1786 return true;
1787 }
Renato Golin541d7e72014-08-01 17:27:31 +00001788 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001789 }
1790 return false;
1791}
1792
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001793unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001794 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001795 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001796 default:
1797 break;
1798 case AArch64::LDRWui:
1799 case AArch64::LDRXui:
1800 case AArch64::LDRBui:
1801 case AArch64::LDRHui:
1802 case AArch64::LDRSui:
1803 case AArch64::LDRDui:
1804 case AArch64::LDRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001805 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1806 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1807 FrameIndex = MI.getOperand(1).getIndex();
1808 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001809 }
1810 break;
1811 }
1812
1813 return 0;
1814}
1815
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001816unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001817 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001818 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001819 default:
1820 break;
1821 case AArch64::STRWui:
1822 case AArch64::STRXui:
1823 case AArch64::STRBui:
1824 case AArch64::STRHui:
1825 case AArch64::STRSui:
1826 case AArch64::STRDui:
1827 case AArch64::STRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001828 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1829 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1830 FrameIndex = MI.getOperand(1).getIndex();
1831 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001832 }
1833 break;
1834 }
1835 return 0;
1836}
1837
1838/// Return true if this is load/store scales or extends its register offset.
1839/// This refers to scaling a dynamic index as opposed to scaled immediates.
1840/// MI should be a memory op that allows scaled addressing.
Evandro Menezesb5f12092018-02-09 16:14:41 +00001841bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001842 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001843 default:
1844 break;
1845 case AArch64::LDRBBroW:
1846 case AArch64::LDRBroW:
1847 case AArch64::LDRDroW:
1848 case AArch64::LDRHHroW:
1849 case AArch64::LDRHroW:
1850 case AArch64::LDRQroW:
1851 case AArch64::LDRSBWroW:
1852 case AArch64::LDRSBXroW:
1853 case AArch64::LDRSHWroW:
1854 case AArch64::LDRSHXroW:
1855 case AArch64::LDRSWroW:
1856 case AArch64::LDRSroW:
1857 case AArch64::LDRWroW:
1858 case AArch64::LDRXroW:
1859 case AArch64::STRBBroW:
1860 case AArch64::STRBroW:
1861 case AArch64::STRDroW:
1862 case AArch64::STRHHroW:
1863 case AArch64::STRHroW:
1864 case AArch64::STRQroW:
1865 case AArch64::STRSroW:
1866 case AArch64::STRWroW:
1867 case AArch64::STRXroW:
1868 case AArch64::LDRBBroX:
1869 case AArch64::LDRBroX:
1870 case AArch64::LDRDroX:
1871 case AArch64::LDRHHroX:
1872 case AArch64::LDRHroX:
1873 case AArch64::LDRQroX:
1874 case AArch64::LDRSBWroX:
1875 case AArch64::LDRSBXroX:
1876 case AArch64::LDRSHWroX:
1877 case AArch64::LDRSHXroX:
1878 case AArch64::LDRSWroX:
1879 case AArch64::LDRSroX:
1880 case AArch64::LDRWroX:
1881 case AArch64::LDRXroX:
1882 case AArch64::STRBBroX:
1883 case AArch64::STRBroX:
1884 case AArch64::STRDroX:
1885 case AArch64::STRHHroX:
1886 case AArch64::STRHroX:
1887 case AArch64::STRQroX:
1888 case AArch64::STRSroX:
1889 case AArch64::STRWroX:
1890 case AArch64::STRXroX:
1891
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001892 unsigned Val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001893 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1894 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1895 }
1896 return false;
1897}
1898
1899/// Check all MachineMemOperands for a hint to suppress pairing.
Evandro Menezesb5f12092018-02-09 16:14:41 +00001900bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
Eugene Zelenko049b0172017-01-06 00:30:53 +00001901 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
Justin Lebar288b3372016-07-14 18:15:20 +00001902 return MMO->getFlags() & MOSuppressPair;
1903 });
Tim Northover3b0846e2014-05-24 12:50:23 +00001904}
1905
1906/// Set a flag on the first MachineMemOperand to suppress pairing.
Evandro Menezesb5f12092018-02-09 16:14:41 +00001907void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001908 if (MI.memoperands_empty())
Tim Northover3b0846e2014-05-24 12:50:23 +00001909 return;
Justin Lebar288b3372016-07-14 18:15:20 +00001910 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
Tim Northover3b0846e2014-05-24 12:50:23 +00001911}
1912
Geoff Berryb1e87142017-07-14 21:44:12 +00001913/// Check all MachineMemOperands for a hint that the load/store is strided.
Evandro Menezesb5f12092018-02-09 16:14:41 +00001914bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
Geoff Berryb1e87142017-07-14 21:44:12 +00001915 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1916 return MMO->getFlags() & MOStridedAccess;
1917 });
1918}
1919
Evandro Menezesb5f12092018-02-09 16:14:41 +00001920bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001921 switch (Opc) {
1922 default:
1923 return false;
1924 case AArch64::STURSi:
1925 case AArch64::STURDi:
1926 case AArch64::STURQi:
1927 case AArch64::STURBBi:
1928 case AArch64::STURHHi:
1929 case AArch64::STURWi:
1930 case AArch64::STURXi:
1931 case AArch64::LDURSi:
1932 case AArch64::LDURDi:
1933 case AArch64::LDURQi:
1934 case AArch64::LDURWi:
1935 case AArch64::LDURXi:
1936 case AArch64::LDURSWi:
1937 case AArch64::LDURHHi:
1938 case AArch64::LDURBBi:
1939 case AArch64::LDURSBWi:
1940 case AArch64::LDURSHWi:
1941 return true;
1942 }
1943}
1944
Evandro Menezesb5f12092018-02-09 16:14:41 +00001945bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1946 switch (MI.getOpcode()) {
1947 default:
1948 return false;
1949 // Scaled instructions.
1950 case AArch64::STRSui:
1951 case AArch64::STRDui:
1952 case AArch64::STRQui:
1953 case AArch64::STRXui:
1954 case AArch64::STRWui:
1955 case AArch64::LDRSui:
1956 case AArch64::LDRDui:
1957 case AArch64::LDRQui:
1958 case AArch64::LDRXui:
1959 case AArch64::LDRWui:
1960 case AArch64::LDRSWui:
1961 // Unscaled instructions.
1962 case AArch64::STURSi:
1963 case AArch64::STURDi:
1964 case AArch64::STURQi:
1965 case AArch64::STURWi:
1966 case AArch64::STURXi:
1967 case AArch64::LDURSi:
1968 case AArch64::LDURDi:
1969 case AArch64::LDURQi:
1970 case AArch64::LDURWi:
1971 case AArch64::LDURXi:
1972 case AArch64::LDURSWi:
1973 return true;
1974 }
1975}
1976
1977unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1978 bool &Is64Bit) {
1979 switch (Opc) {
1980 default:
1981 llvm_unreachable("Opcode has no flag setting equivalent!");
1982 // 32-bit cases:
1983 case AArch64::ADDWri:
1984 Is64Bit = false;
1985 return AArch64::ADDSWri;
1986 case AArch64::ADDWrr:
1987 Is64Bit = false;
1988 return AArch64::ADDSWrr;
1989 case AArch64::ADDWrs:
1990 Is64Bit = false;
1991 return AArch64::ADDSWrs;
1992 case AArch64::ADDWrx:
1993 Is64Bit = false;
1994 return AArch64::ADDSWrx;
1995 case AArch64::ANDWri:
1996 Is64Bit = false;
1997 return AArch64::ANDSWri;
1998 case AArch64::ANDWrr:
1999 Is64Bit = false;
2000 return AArch64::ANDSWrr;
2001 case AArch64::ANDWrs:
2002 Is64Bit = false;
2003 return AArch64::ANDSWrs;
2004 case AArch64::BICWrr:
2005 Is64Bit = false;
2006 return AArch64::BICSWrr;
2007 case AArch64::BICWrs:
2008 Is64Bit = false;
2009 return AArch64::BICSWrs;
2010 case AArch64::SUBWri:
2011 Is64Bit = false;
2012 return AArch64::SUBSWri;
2013 case AArch64::SUBWrr:
2014 Is64Bit = false;
2015 return AArch64::SUBSWrr;
2016 case AArch64::SUBWrs:
2017 Is64Bit = false;
2018 return AArch64::SUBSWrs;
2019 case AArch64::SUBWrx:
2020 Is64Bit = false;
2021 return AArch64::SUBSWrx;
2022 // 64-bit cases:
2023 case AArch64::ADDXri:
2024 Is64Bit = true;
2025 return AArch64::ADDSXri;
2026 case AArch64::ADDXrr:
2027 Is64Bit = true;
2028 return AArch64::ADDSXrr;
2029 case AArch64::ADDXrs:
2030 Is64Bit = true;
2031 return AArch64::ADDSXrs;
2032 case AArch64::ADDXrx:
2033 Is64Bit = true;
2034 return AArch64::ADDSXrx;
2035 case AArch64::ANDXri:
2036 Is64Bit = true;
2037 return AArch64::ANDSXri;
2038 case AArch64::ANDXrr:
2039 Is64Bit = true;
2040 return AArch64::ANDSXrr;
2041 case AArch64::ANDXrs:
2042 Is64Bit = true;
2043 return AArch64::ANDSXrs;
2044 case AArch64::BICXrr:
2045 Is64Bit = true;
2046 return AArch64::BICSXrr;
2047 case AArch64::BICXrs:
2048 Is64Bit = true;
2049 return AArch64::BICSXrs;
2050 case AArch64::SUBXri:
2051 Is64Bit = true;
2052 return AArch64::SUBSXri;
2053 case AArch64::SUBXrr:
2054 Is64Bit = true;
2055 return AArch64::SUBSXrr;
2056 case AArch64::SUBXrs:
2057 Is64Bit = true;
2058 return AArch64::SUBSXrs;
2059 case AArch64::SUBXrx:
2060 Is64Bit = true;
2061 return AArch64::SUBSXrx;
2062 }
Chad Rosiere4e15ba2016-03-09 17:29:48 +00002063}
2064
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002065// Is this a candidate for ld/st merging or pairing? For example, we don't
2066// touch volatiles or load/stores that have a hint to avoid pair formation.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002067bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002068 // If this is a volatile load/store, don't mess with it.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002069 if (MI.hasOrderedMemoryRef())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002070 return false;
2071
2072 // Make sure this is a reg+imm (as opposed to an address reloc).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002073 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
2074 if (!MI.getOperand(2).isImm())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002075 return false;
2076
2077 // Can't merge/pair if the instruction modifies the base register.
2078 // e.g., ldr x0, [x0]
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002079 unsigned BaseReg = MI.getOperand(1).getReg();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002080 const TargetRegisterInfo *TRI = &getRegisterInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002081 if (MI.modifiesRegister(BaseReg, TRI))
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002082 return false;
2083
2084 // Check if this load/store has a hint to avoid pair formation.
2085 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2086 if (isLdStPairSuppressed(MI))
2087 return false;
2088
Matthias Braun651cff42016-06-02 18:03:53 +00002089 // On some CPUs quad load/store pairs are slower than two single load/stores.
Evandro Menezes7784cac2017-01-24 17:34:31 +00002090 if (Subtarget.isPaired128Slow()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002091 switch (MI.getOpcode()) {
Matthias Braunbcfd2362016-05-28 01:06:51 +00002092 default:
2093 break;
Matthias Braunbcfd2362016-05-28 01:06:51 +00002094 case AArch64::LDURQi:
2095 case AArch64::STURQi:
2096 case AArch64::LDRQui:
2097 case AArch64::STRQui:
2098 return false;
Evandro Menezes8d53f882016-04-13 18:31:45 +00002099 }
Matthias Braunbcfd2362016-05-28 01:06:51 +00002100 }
Evandro Menezes8d53f882016-04-13 18:31:45 +00002101
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002102 return true;
2103}
2104
Chad Rosierc27a18f2016-03-09 16:00:35 +00002105bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002106 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
Chad Rosierc27a18f2016-03-09 16:00:35 +00002107 const TargetRegisterInfo *TRI) const {
Geoff Berry22dfbc52016-08-12 15:26:00 +00002108 unsigned Width;
2109 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002110}
2111
Sanjoy Dasb666ea32015-06-15 18:44:14 +00002112bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002113 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
Chad Rosier3528c1e2014-09-08 14:43:48 +00002114 const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002115 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00002116 // Handle only loads/stores with base register followed by immediate offset.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002117 if (LdSt.getNumExplicitOperands() == 3) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002118 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002119 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002120 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002121 } else if (LdSt.getNumExplicitOperands() == 4) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002122 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002123 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
2124 !LdSt.getOperand(3).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002125 return false;
2126 } else
Chad Rosier3528c1e2014-09-08 14:43:48 +00002127 return false;
2128
Jessica Paquette809d7082017-07-28 03:21:58 +00002129 // Get the scaling factor for the instruction and set the width for the
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002130 // instruction.
Chad Rosier0da267d2016-03-09 16:46:48 +00002131 unsigned Scale = 0;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002132 int64_t Dummy1, Dummy2;
2133
2134 // If this returns false, then it's an instruction we don't want to handle.
2135 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
Chad Rosier3528c1e2014-09-08 14:43:48 +00002136 return false;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002137
2138 // Compute the offset. Offset is calculated as the immediate operand
2139 // multiplied by the scaling factor. Unscaled instructions have scaling factor
2140 // set to 1.
2141 if (LdSt.getNumExplicitOperands() == 3) {
2142 BaseReg = LdSt.getOperand(1).getReg();
2143 Offset = LdSt.getOperand(2).getImm() * Scale;
2144 } else {
2145 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2146 BaseReg = LdSt.getOperand(2).getReg();
2147 Offset = LdSt.getOperand(3).getImm() * Scale;
2148 }
2149 return true;
2150}
2151
Jessica Paquette809d7082017-07-28 03:21:58 +00002152MachineOperand &
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002153AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2154 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Jessica Paquette809d7082017-07-28 03:21:58 +00002155 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002156 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2157 return OfsOp;
2158}
2159
2160bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2161 unsigned &Width, int64_t &MinOffset,
2162 int64_t &MaxOffset) const {
2163 switch (Opcode) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002164 // Not a memory operation or something we want to handle.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002165 default:
2166 Scale = Width = 0;
2167 MinOffset = MaxOffset = 0;
2168 return false;
2169 case AArch64::STRWpost:
2170 case AArch64::LDRWpost:
2171 Width = 32;
2172 Scale = 4;
2173 MinOffset = -256;
2174 MaxOffset = 255;
2175 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002176 case AArch64::LDURQi:
2177 case AArch64::STURQi:
2178 Width = 16;
2179 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002180 MinOffset = -256;
2181 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002182 break;
2183 case AArch64::LDURXi:
2184 case AArch64::LDURDi:
2185 case AArch64::STURXi:
2186 case AArch64::STURDi:
2187 Width = 8;
2188 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002189 MinOffset = -256;
2190 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002191 break;
2192 case AArch64::LDURWi:
2193 case AArch64::LDURSi:
2194 case AArch64::LDURSWi:
2195 case AArch64::STURWi:
2196 case AArch64::STURSi:
2197 Width = 4;
2198 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002199 MinOffset = -256;
2200 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002201 break;
2202 case AArch64::LDURHi:
2203 case AArch64::LDURHHi:
2204 case AArch64::LDURSHXi:
2205 case AArch64::LDURSHWi:
2206 case AArch64::STURHi:
2207 case AArch64::STURHHi:
2208 Width = 2;
2209 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002210 MinOffset = -256;
2211 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002212 break;
2213 case AArch64::LDURBi:
2214 case AArch64::LDURBBi:
2215 case AArch64::LDURSBXi:
2216 case AArch64::LDURSBWi:
2217 case AArch64::STURBi:
2218 case AArch64::STURBBi:
2219 Width = 1;
2220 Scale = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002221 MinOffset = -256;
2222 MaxOffset = 255;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002223 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002224 case AArch64::LDPQi:
2225 case AArch64::LDNPQi:
2226 case AArch64::STPQi:
2227 case AArch64::STNPQi:
2228 Scale = 16;
2229 Width = 32;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002230 MinOffset = -64;
2231 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002232 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00002233 case AArch64::LDRQui:
2234 case AArch64::STRQui:
2235 Scale = Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002236 MinOffset = 0;
2237 MaxOffset = 4095;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00002238 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002239 case AArch64::LDPXi:
2240 case AArch64::LDPDi:
2241 case AArch64::LDNPXi:
2242 case AArch64::LDNPDi:
2243 case AArch64::STPXi:
2244 case AArch64::STPDi:
2245 case AArch64::STNPXi:
2246 case AArch64::STNPDi:
2247 Scale = 8;
2248 Width = 16;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002249 MinOffset = -64;
2250 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002251 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002252 case AArch64::LDRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00002253 case AArch64::LDRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002254 case AArch64::STRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00002255 case AArch64::STRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002256 Scale = Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002257 MinOffset = 0;
2258 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002259 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002260 case AArch64::LDPWi:
2261 case AArch64::LDPSi:
2262 case AArch64::LDNPWi:
2263 case AArch64::LDNPSi:
2264 case AArch64::STPWi:
2265 case AArch64::STPSi:
2266 case AArch64::STNPWi:
2267 case AArch64::STNPSi:
2268 Scale = 4;
2269 Width = 8;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002270 MinOffset = -64;
2271 MaxOffset = 63;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00002272 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002273 case AArch64::LDRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002274 case AArch64::LDRSui:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002275 case AArch64::LDRSWui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00002276 case AArch64::STRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00002277 case AArch64::STRSui:
2278 Scale = Width = 4;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002279 MinOffset = 0;
2280 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002281 break;
Chad Rosier84a0afd2015-09-18 14:13:18 +00002282 case AArch64::LDRHui:
2283 case AArch64::LDRHHui:
2284 case AArch64::STRHui:
2285 case AArch64::STRHHui:
2286 Scale = Width = 2;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002287 MinOffset = 0;
2288 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002289 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00002290 case AArch64::LDRBui:
2291 case AArch64::LDRBBui:
2292 case AArch64::STRBui:
2293 case AArch64::STRBBui:
2294 Scale = Width = 1;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00002295 MinOffset = 0;
2296 MaxOffset = 4095;
Chad Rosier3528c1e2014-09-08 14:43:48 +00002297 break;
Chad Rosier064261d2016-02-01 20:54:36 +00002298 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00002299
Chad Rosier3528c1e2014-09-08 14:43:48 +00002300 return true;
2301}
2302
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002303// Scale the unscaled offsets. Returns false if the unscaled offset can't be
2304// scaled.
2305static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2306 unsigned OffsetStride = 1;
2307 switch (Opc) {
2308 default:
2309 return false;
2310 case AArch64::LDURQi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002311 case AArch64::STURQi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002312 OffsetStride = 16;
2313 break;
2314 case AArch64::LDURXi:
2315 case AArch64::LDURDi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002316 case AArch64::STURXi:
2317 case AArch64::STURDi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002318 OffsetStride = 8;
2319 break;
2320 case AArch64::LDURWi:
2321 case AArch64::LDURSi:
2322 case AArch64::LDURSWi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002323 case AArch64::STURWi:
2324 case AArch64::STURSi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002325 OffsetStride = 4;
2326 break;
2327 }
2328 // If the byte-offset isn't a multiple of the stride, we can't scale this
2329 // offset.
2330 if (Offset % OffsetStride != 0)
2331 return false;
2332
2333 // Convert the byte-offset used by unscaled into an "element" offset used
2334 // by the scaled pair load/store instructions.
2335 Offset /= OffsetStride;
2336 return true;
2337}
2338
2339static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2340 if (FirstOpc == SecondOpc)
2341 return true;
2342 // We can also pair sign-ext and zero-ext instructions.
2343 switch (FirstOpc) {
2344 default:
2345 return false;
2346 case AArch64::LDRWui:
2347 case AArch64::LDURWi:
2348 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2349 case AArch64::LDRSWui:
2350 case AArch64::LDURSWi:
2351 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2352 }
2353 // These instructions can't be paired based on their opcodes.
2354 return false;
2355}
2356
Tim Northover3b0846e2014-05-24 12:50:23 +00002357/// Detect opportunities for ldp/stp formation.
2358///
Sanjoy Dasb666ea32015-06-15 18:44:14 +00002359/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002360bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
Stanislav Mekhanoshin7fe9a5d2017-09-13 22:20:47 +00002361 unsigned BaseReg1,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002362 MachineInstr &SecondLdSt,
Stanislav Mekhanoshin7fe9a5d2017-09-13 22:20:47 +00002363 unsigned BaseReg2,
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00002364 unsigned NumLoads) const {
Stanislav Mekhanoshin7fe9a5d2017-09-13 22:20:47 +00002365 if (BaseReg1 != BaseReg2)
2366 return false;
2367
Tim Northover3b0846e2014-05-24 12:50:23 +00002368 // Only cluster up to a single pair.
2369 if (NumLoads > 1)
2370 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002371
Geoff Berry22dfbc52016-08-12 15:26:00 +00002372 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2373 return false;
2374
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002375 // Can we pair these instructions based on their opcodes?
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002376 unsigned FirstOpc = FirstLdSt.getOpcode();
2377 unsigned SecondOpc = SecondLdSt.getOpcode();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002378 if (!canPairLdStOpc(FirstOpc, SecondOpc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002379 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002380
2381 // Can't merge volatiles or load/stores that have a hint to avoid pair
2382 // formation, for example.
2383 if (!isCandidateToMergeOrPair(FirstLdSt) ||
2384 !isCandidateToMergeOrPair(SecondLdSt))
Tim Northover3b0846e2014-05-24 12:50:23 +00002385 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002386
2387 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002388 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002389 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2390 return false;
2391
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002392 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002393 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2394 return false;
2395
2396 // Pairwise instructions have a 7-bit signed offset field.
2397 if (Offset1 > 63 || Offset1 < -64)
2398 return false;
2399
Tim Northover3b0846e2014-05-24 12:50:23 +00002400 // The caller should already have ordered First/SecondLdSt by offset.
Chad Rosiercdfd7e72016-03-18 19:21:02 +00002401 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2402 return Offset1 + 1 == Offset2;
Tim Northover3b0846e2014-05-24 12:50:23 +00002403}
2404
Tim Northover3b0846e2014-05-24 12:50:23 +00002405static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2406 unsigned Reg, unsigned SubIdx,
2407 unsigned State,
2408 const TargetRegisterInfo *TRI) {
2409 if (!SubIdx)
2410 return MIB.addReg(Reg, State);
2411
2412 if (TargetRegisterInfo::isPhysicalRegister(Reg))
2413 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2414 return MIB.addReg(Reg, State, SubIdx);
2415}
2416
2417static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2418 unsigned NumRegs) {
2419 // We really want the positive remainder mod 32 here, that happens to be
2420 // easily obtainable with a mask.
2421 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2422}
2423
Jessica Paquette809d7082017-07-28 03:21:58 +00002424void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2425 MachineBasicBlock::iterator I,
2426 const DebugLoc &DL, unsigned DestReg,
2427 unsigned SrcReg, bool KillSrc,
2428 unsigned Opcode,
2429 ArrayRef<unsigned> Indices) const {
2430 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
Eric Christophera0de2532015-03-18 20:37:30 +00002431 const TargetRegisterInfo *TRI = &getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002432 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2433 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2434 unsigned NumRegs = Indices.size();
2435
2436 int SubReg = 0, End = NumRegs, Incr = 1;
2437 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2438 SubReg = NumRegs - 1;
2439 End = -1;
2440 Incr = -1;
2441 }
2442
2443 for (; SubReg != End; SubReg += Incr) {
James Molloyf8aa57a2015-04-16 11:37:40 +00002444 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
Tim Northover3b0846e2014-05-24 12:50:23 +00002445 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2446 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2447 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2448 }
2449}
2450
2451void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002452 MachineBasicBlock::iterator I,
2453 const DebugLoc &DL, unsigned DestReg,
2454 unsigned SrcReg, bool KillSrc) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002455 if (AArch64::GPR32spRegClass.contains(DestReg) &&
2456 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
Eric Christophera0de2532015-03-18 20:37:30 +00002457 const TargetRegisterInfo *TRI = &getRegisterInfo();
2458
Tim Northover3b0846e2014-05-24 12:50:23 +00002459 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2460 // If either operand is WSP, expand to ADD #0.
2461 if (Subtarget.hasZeroCycleRegMove()) {
2462 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2463 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2464 &AArch64::GPR64spRegClass);
2465 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2466 &AArch64::GPR64spRegClass);
2467 // This instruction is reading and writing X registers. This may upset
2468 // the register scavenger and machine verifier, so we need to indicate
2469 // that we are reading an undefined value from SrcRegX, but a proper
2470 // value from SrcReg.
2471 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2472 .addReg(SrcRegX, RegState::Undef)
2473 .addImm(0)
2474 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2475 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2476 } else {
2477 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2478 .addReg(SrcReg, getKillRegState(KillSrc))
2479 .addImm(0)
2480 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2481 }
2482 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002483 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2484 .addImm(0)
2485 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002486 } else {
2487 if (Subtarget.hasZeroCycleRegMove()) {
2488 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2489 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2490 &AArch64::GPR64spRegClass);
2491 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2492 &AArch64::GPR64spRegClass);
2493 // This instruction is reading and writing X registers. This may upset
2494 // the register scavenger and machine verifier, so we need to indicate
2495 // that we are reading an undefined value from SrcRegX, but a proper
2496 // value from SrcReg.
2497 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2498 .addReg(AArch64::XZR)
2499 .addReg(SrcRegX, RegState::Undef)
2500 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2501 } else {
2502 // Otherwise, expand to ORR WZR.
2503 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2504 .addReg(AArch64::WZR)
2505 .addReg(SrcReg, getKillRegState(KillSrc));
2506 }
2507 }
2508 return;
2509 }
2510
2511 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2512 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2513 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2514 // If either operand is SP, expand to ADD #0.
2515 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2516 .addReg(SrcReg, getKillRegState(KillSrc))
2517 .addImm(0)
2518 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2519 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002520 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2521 .addImm(0)
2522 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002523 } else {
2524 // Otherwise, expand to ORR XZR.
2525 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2526 .addReg(AArch64::XZR)
2527 .addReg(SrcReg, getKillRegState(KillSrc));
2528 }
2529 return;
2530 }
2531
2532 // Copy a DDDD register quad by copying the individual sub-registers.
2533 if (AArch64::DDDDRegClass.contains(DestReg) &&
2534 AArch64::DDDDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002535 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2536 AArch64::dsub2, AArch64::dsub3};
Tim Northover3b0846e2014-05-24 12:50:23 +00002537 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2538 Indices);
2539 return;
2540 }
2541
2542 // Copy a DDD register triple by copying the individual sub-registers.
2543 if (AArch64::DDDRegClass.contains(DestReg) &&
2544 AArch64::DDDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002545 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2546 AArch64::dsub2};
Tim Northover3b0846e2014-05-24 12:50:23 +00002547 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2548 Indices);
2549 return;
2550 }
2551
2552 // Copy a DD register pair by copying the individual sub-registers.
2553 if (AArch64::DDRegClass.contains(DestReg) &&
2554 AArch64::DDRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002555 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
Tim Northover3b0846e2014-05-24 12:50:23 +00002556 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2557 Indices);
2558 return;
2559 }
2560
2561 // Copy a QQQQ register quad by copying the individual sub-registers.
2562 if (AArch64::QQQQRegClass.contains(DestReg) &&
2563 AArch64::QQQQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002564 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2565 AArch64::qsub2, AArch64::qsub3};
Tim Northover3b0846e2014-05-24 12:50:23 +00002566 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2567 Indices);
2568 return;
2569 }
2570
2571 // Copy a QQQ register triple by copying the individual sub-registers.
2572 if (AArch64::QQQRegClass.contains(DestReg) &&
2573 AArch64::QQQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002574 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2575 AArch64::qsub2};
Tim Northover3b0846e2014-05-24 12:50:23 +00002576 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2577 Indices);
2578 return;
2579 }
2580
2581 // Copy a QQ register pair by copying the individual sub-registers.
2582 if (AArch64::QQRegClass.contains(DestReg) &&
2583 AArch64::QQRegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002584 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
Tim Northover3b0846e2014-05-24 12:50:23 +00002585 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2586 Indices);
2587 return;
2588 }
2589
2590 if (AArch64::FPR128RegClass.contains(DestReg) &&
2591 AArch64::FPR128RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002592 if (Subtarget.hasNEON()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002593 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2594 .addReg(SrcReg)
2595 .addReg(SrcReg, getKillRegState(KillSrc));
2596 } else {
2597 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
Jessica Paquette809d7082017-07-28 03:21:58 +00002598 .addReg(AArch64::SP, RegState::Define)
2599 .addReg(SrcReg, getKillRegState(KillSrc))
2600 .addReg(AArch64::SP)
2601 .addImm(-16);
Tim Northover3b0846e2014-05-24 12:50:23 +00002602 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
Jessica Paquette809d7082017-07-28 03:21:58 +00002603 .addReg(AArch64::SP, RegState::Define)
2604 .addReg(DestReg, RegState::Define)
2605 .addReg(AArch64::SP)
2606 .addImm(16);
Tim Northover3b0846e2014-05-24 12:50:23 +00002607 }
2608 return;
2609 }
2610
2611 if (AArch64::FPR64RegClass.contains(DestReg) &&
2612 AArch64::FPR64RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002613 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002614 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2615 &AArch64::FPR128RegClass);
2616 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2617 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002618 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2619 .addReg(SrcReg)
2620 .addReg(SrcReg, getKillRegState(KillSrc));
2621 } else {
2622 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2623 .addReg(SrcReg, getKillRegState(KillSrc));
2624 }
2625 return;
2626 }
2627
2628 if (AArch64::FPR32RegClass.contains(DestReg) &&
2629 AArch64::FPR32RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002630 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002631 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2632 &AArch64::FPR128RegClass);
2633 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2634 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002635 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2636 .addReg(SrcReg)
2637 .addReg(SrcReg, getKillRegState(KillSrc));
2638 } else {
2639 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2640 .addReg(SrcReg, getKillRegState(KillSrc));
2641 }
2642 return;
2643 }
2644
2645 if (AArch64::FPR16RegClass.contains(DestReg) &&
2646 AArch64::FPR16RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002647 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002648 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2649 &AArch64::FPR128RegClass);
2650 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2651 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002652 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2653 .addReg(SrcReg)
2654 .addReg(SrcReg, getKillRegState(KillSrc));
2655 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002656 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2657 &AArch64::FPR32RegClass);
2658 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2659 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002660 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2661 .addReg(SrcReg, getKillRegState(KillSrc));
2662 }
2663 return;
2664 }
2665
2666 if (AArch64::FPR8RegClass.contains(DestReg) &&
2667 AArch64::FPR8RegClass.contains(SrcReg)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002668 if (Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002669 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
Tim Northover3b0846e2014-05-24 12:50:23 +00002670 &AArch64::FPR128RegClass);
Eric Christophera0de2532015-03-18 20:37:30 +00002671 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2672 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002673 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2674 .addReg(SrcReg)
2675 .addReg(SrcReg, getKillRegState(KillSrc));
2676 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002677 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2678 &AArch64::FPR32RegClass);
2679 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2680 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002681 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2682 .addReg(SrcReg, getKillRegState(KillSrc));
2683 }
2684 return;
2685 }
2686
2687 // Copies between GPR64 and FPR64.
2688 if (AArch64::FPR64RegClass.contains(DestReg) &&
2689 AArch64::GPR64RegClass.contains(SrcReg)) {
2690 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2691 .addReg(SrcReg, getKillRegState(KillSrc));
2692 return;
2693 }
2694 if (AArch64::GPR64RegClass.contains(DestReg) &&
2695 AArch64::FPR64RegClass.contains(SrcReg)) {
2696 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2697 .addReg(SrcReg, getKillRegState(KillSrc));
2698 return;
2699 }
2700 // Copies between GPR32 and FPR32.
2701 if (AArch64::FPR32RegClass.contains(DestReg) &&
2702 AArch64::GPR32RegClass.contains(SrcReg)) {
2703 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2704 .addReg(SrcReg, getKillRegState(KillSrc));
2705 return;
2706 }
2707 if (AArch64::GPR32RegClass.contains(DestReg) &&
2708 AArch64::FPR32RegClass.contains(SrcReg)) {
2709 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2710 .addReg(SrcReg, getKillRegState(KillSrc));
2711 return;
2712 }
2713
Tim Northover1bed9af2014-05-27 12:16:02 +00002714 if (DestReg == AArch64::NZCV) {
2715 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2716 BuildMI(MBB, I, DL, get(AArch64::MSR))
Jessica Paquette809d7082017-07-28 03:21:58 +00002717 .addImm(AArch64SysReg::NZCV)
2718 .addReg(SrcReg, getKillRegState(KillSrc))
2719 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
Tim Northover1bed9af2014-05-27 12:16:02 +00002720 return;
2721 }
2722
2723 if (SrcReg == AArch64::NZCV) {
2724 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
Quentin Colombet658d9db2016-04-22 18:46:17 +00002725 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
Jessica Paquette809d7082017-07-28 03:21:58 +00002726 .addImm(AArch64SysReg::NZCV)
2727 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
Tim Northover1bed9af2014-05-27 12:16:02 +00002728 return;
2729 }
2730
2731 llvm_unreachable("unimplemented reg-to-reg copy");
Tim Northover3b0846e2014-05-24 12:50:23 +00002732}
2733
2734void AArch64InstrInfo::storeRegToStackSlot(
2735 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2736 bool isKill, int FI, const TargetRegisterClass *RC,
2737 const TargetRegisterInfo *TRI) const {
2738 DebugLoc DL;
2739 if (MBBI != MBB.end())
2740 DL = MBBI->getDebugLoc();
2741 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002742 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002743 unsigned Align = MFI.getObjectAlignment(FI);
2744
Alex Lorenze40c8a22015-08-11 23:09:45 +00002745 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002746 MachineMemOperand *MMO = MF.getMachineMemOperand(
2747 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2748 unsigned Opc = 0;
2749 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002750 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002751 case 1:
2752 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2753 Opc = AArch64::STRBui;
2754 break;
2755 case 2:
2756 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2757 Opc = AArch64::STRHui;
2758 break;
2759 case 4:
2760 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2761 Opc = AArch64::STRWui;
2762 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2763 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2764 else
2765 assert(SrcReg != AArch64::WSP);
2766 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2767 Opc = AArch64::STRSui;
2768 break;
2769 case 8:
2770 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2771 Opc = AArch64::STRXui;
2772 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2773 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2774 else
2775 assert(SrcReg != AArch64::SP);
2776 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2777 Opc = AArch64::STRDui;
2778 break;
2779 case 16:
2780 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2781 Opc = AArch64::STRQui;
2782 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002783 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002784 Opc = AArch64::ST1Twov1d;
2785 Offset = false;
Oliver Stannarda9d2e002018-01-29 09:18:37 +00002786 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2787 BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
2788 .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
2789 getKillRegState(isKill))
2790 .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
2791 getKillRegState(isKill))
2792 .addFrameIndex(FI)
2793 .addImm(0)
2794 .addMemOperand(MMO);
2795 return;
Tim Northover3b0846e2014-05-24 12:50:23 +00002796 }
2797 break;
2798 case 24:
2799 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002800 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002801 Opc = AArch64::ST1Threev1d;
2802 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002803 }
2804 break;
2805 case 32:
2806 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002807 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002808 Opc = AArch64::ST1Fourv1d;
2809 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002810 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002811 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002812 Opc = AArch64::ST1Twov2d;
2813 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002814 }
2815 break;
2816 case 48:
2817 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002818 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002819 Opc = AArch64::ST1Threev2d;
2820 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002821 }
2822 break;
2823 case 64:
2824 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002825 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002826 Opc = AArch64::ST1Fourv2d;
2827 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002828 }
2829 break;
2830 }
2831 assert(Opc && "Unknown register class");
2832
James Molloyf8aa57a2015-04-16 11:37:40 +00002833 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Jessica Paquette809d7082017-07-28 03:21:58 +00002834 .addReg(SrcReg, getKillRegState(isKill))
2835 .addFrameIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002836
2837 if (Offset)
2838 MI.addImm(0);
2839 MI.addMemOperand(MMO);
2840}
2841
2842void AArch64InstrInfo::loadRegFromStackSlot(
2843 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2844 int FI, const TargetRegisterClass *RC,
2845 const TargetRegisterInfo *TRI) const {
2846 DebugLoc DL;
2847 if (MBBI != MBB.end())
2848 DL = MBBI->getDebugLoc();
2849 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002850 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002851 unsigned Align = MFI.getObjectAlignment(FI);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002852 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002853 MachineMemOperand *MMO = MF.getMachineMemOperand(
2854 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2855
2856 unsigned Opc = 0;
2857 bool Offset = true;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00002858 switch (TRI->getSpillSize(*RC)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002859 case 1:
2860 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2861 Opc = AArch64::LDRBui;
2862 break;
2863 case 2:
2864 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2865 Opc = AArch64::LDRHui;
2866 break;
2867 case 4:
2868 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2869 Opc = AArch64::LDRWui;
2870 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2871 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2872 else
2873 assert(DestReg != AArch64::WSP);
2874 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2875 Opc = AArch64::LDRSui;
2876 break;
2877 case 8:
2878 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2879 Opc = AArch64::LDRXui;
2880 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2881 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2882 else
2883 assert(DestReg != AArch64::SP);
2884 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2885 Opc = AArch64::LDRDui;
2886 break;
2887 case 16:
2888 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2889 Opc = AArch64::LDRQui;
2890 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002891 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002892 Opc = AArch64::LD1Twov1d;
2893 Offset = false;
Oliver Stannarda9d2e002018-01-29 09:18:37 +00002894 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2895 BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
2896 .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
2897 getDefRegState(true))
2898 .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
2899 getDefRegState(true))
2900 .addFrameIndex(FI)
2901 .addImm(0)
2902 .addMemOperand(MMO);
2903 return;
Tim Northover3b0846e2014-05-24 12:50:23 +00002904 }
2905 break;
2906 case 24:
2907 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002908 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002909 Opc = AArch64::LD1Threev1d;
2910 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002911 }
2912 break;
2913 case 32:
2914 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002915 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002916 Opc = AArch64::LD1Fourv1d;
2917 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002918 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002919 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002920 Opc = AArch64::LD1Twov2d;
2921 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002922 }
2923 break;
2924 case 48:
2925 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002926 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002927 Opc = AArch64::LD1Threev2d;
2928 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002929 }
2930 break;
2931 case 64:
2932 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Jessica Paquette809d7082017-07-28 03:21:58 +00002933 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002934 Opc = AArch64::LD1Fourv2d;
2935 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002936 }
2937 break;
2938 }
2939 assert(Opc && "Unknown register class");
2940
James Molloyf8aa57a2015-04-16 11:37:40 +00002941 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Jessica Paquette809d7082017-07-28 03:21:58 +00002942 .addReg(DestReg, getDefRegState(true))
2943 .addFrameIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002944 if (Offset)
2945 MI.addImm(0);
2946 MI.addMemOperand(MMO);
2947}
2948
2949void llvm::emitFrameOffset(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002950 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00002951 unsigned DestReg, unsigned SrcReg, int Offset,
Eric Christopherbc76b972014-06-10 17:33:39 +00002952 const TargetInstrInfo *TII,
Tim Northover3b0846e2014-05-24 12:50:23 +00002953 MachineInstr::MIFlag Flag, bool SetNZCV) {
2954 if (DestReg == SrcReg && Offset == 0)
2955 return;
2956
Geoff Berrya5335642016-05-06 16:34:59 +00002957 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2958 "SP increment/decrement not 16-byte aligned");
2959
Tim Northover3b0846e2014-05-24 12:50:23 +00002960 bool isSub = Offset < 0;
2961 if (isSub)
2962 Offset = -Offset;
2963
2964 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2965 // scratch register. If DestReg is a virtual register, use it as the
2966 // scratch register; otherwise, create a new virtual register (to be
2967 // replaced by the scavenger at the end of PEI). That case can be optimized
2968 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2969 // register can be loaded with offset%8 and the add/sub can use an extending
2970 // instruction with LSL#3.
2971 // Currently the function handles any offsets but generates a poor sequence
2972 // of code.
2973 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2974
2975 unsigned Opc;
2976 if (SetNZCV)
2977 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2978 else
2979 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2980 const unsigned MaxEncoding = 0xfff;
2981 const unsigned ShiftSize = 12;
2982 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2983 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2984 unsigned ThisVal;
2985 if (((unsigned)Offset) > MaxEncodableValue) {
2986 ThisVal = MaxEncodableValue;
2987 } else {
2988 ThisVal = Offset & MaxEncodableValue;
2989 }
2990 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2991 "Encoding cannot handle value that big");
2992 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2993 .addReg(SrcReg)
2994 .addImm(ThisVal >> ShiftSize)
2995 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2996 .setMIFlag(Flag);
2997
2998 SrcReg = DestReg;
2999 Offset -= ThisVal;
3000 if (Offset == 0)
3001 return;
3002 }
3003 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3004 .addReg(SrcReg)
3005 .addImm(Offset)
3006 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
3007 .setMIFlag(Flag);
3008}
3009
Keno Fischere70b31f2015-06-08 20:09:58 +00003010MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003011 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
Jonas Paulsson8e5b0c62016-05-10 08:09:37 +00003012 MachineBasicBlock::iterator InsertPt, int FrameIndex,
3013 LiveIntervals *LIS) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00003014 // This is a bit of a hack. Consider this instruction:
3015 //
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00003016 // %0 = COPY %sp; GPR64all:%0
Tim Northover3b0846e2014-05-24 12:50:23 +00003017 //
3018 // We explicitly chose GPR64all for the virtual register so such a copy might
3019 // be eliminated by RegisterCoalescer. However, that may not be possible, and
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00003020 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
Tim Northover3b0846e2014-05-24 12:50:23 +00003021 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3022 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00003023 // To prevent that, we are going to constrain the %0 register class here.
Tim Northover3b0846e2014-05-24 12:50:23 +00003024 //
3025 // <rdar://problem/11522048>
3026 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00003027 if (MI.isFullCopy()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003028 unsigned DstReg = MI.getOperand(0).getReg();
3029 unsigned SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00003030 if (SrcReg == AArch64::SP &&
3031 TargetRegisterInfo::isVirtualRegister(DstReg)) {
3032 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3033 return nullptr;
3034 }
3035 if (DstReg == AArch64::SP &&
3036 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
3037 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3038 return nullptr;
3039 }
3040 }
3041
Geoff Berryd46b6e82017-01-05 21:51:42 +00003042 // Handle the case where a copy is being spilled or filled but the source
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003043 // and destination register class don't match. For example:
Geoff Berry7c078fc2016-11-29 18:28:32 +00003044 //
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00003045 // %0 = COPY %xzr; GPR64common:%0
Geoff Berry7c078fc2016-11-29 18:28:32 +00003046 //
3047 // In this case we can still safely fold away the COPY and generate the
3048 // following spill code:
3049 //
Francis Visoiu Mistrih0b5bdce2017-12-15 16:33:45 +00003050 // STRXui %xzr, %stack.0
Geoff Berry7c078fc2016-11-29 18:28:32 +00003051 //
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003052 // This also eliminates spilled cross register class COPYs (e.g. between x and
3053 // d regs) of the same size. For example:
3054 //
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00003055 // %0 = COPY %1; GPR64:%0, FPR64:%1
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003056 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00003057 // will be filled as
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003058 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00003059 // LDRDui %0, fi<#0>
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003060 //
3061 // instead of
3062 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00003063 // LDRXui %Temp, fi<#0>
3064 // %0 = FMOV %Temp
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003065 //
Geoff Berryd46b6e82017-01-05 21:51:42 +00003066 if (MI.isCopy() && Ops.size() == 1 &&
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003067 // Make sure we're only folding the explicit COPY defs/uses.
3068 (Ops[0] == 0 || Ops[0] == 1)) {
Geoff Berryd46b6e82017-01-05 21:51:42 +00003069 bool IsSpill = Ops[0] == 0;
3070 bool IsFill = !IsSpill;
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003071 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3072 const MachineRegisterInfo &MRI = MF.getRegInfo();
Geoff Berry7c078fc2016-11-29 18:28:32 +00003073 MachineBasicBlock &MBB = *MI.getParent();
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003074 const MachineOperand &DstMO = MI.getOperand(0);
Geoff Berry7c078fc2016-11-29 18:28:32 +00003075 const MachineOperand &SrcMO = MI.getOperand(1);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003076 unsigned DstReg = DstMO.getReg();
Geoff Berry7c078fc2016-11-29 18:28:32 +00003077 unsigned SrcReg = SrcMO.getReg();
Geoff Berryd46b6e82017-01-05 21:51:42 +00003078 // This is slightly expensive to compute for physical regs since
3079 // getMinimalPhysRegClass is slow.
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003080 auto getRegClass = [&](unsigned Reg) {
3081 return TargetRegisterInfo::isVirtualRegister(Reg)
3082 ? MRI.getRegClass(Reg)
3083 : TRI.getMinimalPhysRegClass(Reg);
3084 };
Geoff Berryd46b6e82017-01-05 21:51:42 +00003085
3086 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00003087 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
Jessica Paquette809d7082017-07-28 03:21:58 +00003088 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00003089 "Mismatched register size in non subreg COPY");
3090 if (IsSpill)
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003091 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
Geoff Berryd46b6e82017-01-05 21:51:42 +00003092 getRegClass(SrcReg), &TRI);
Geoff Berry7ffce7b2016-12-01 23:43:55 +00003093 else
Geoff Berryd46b6e82017-01-05 21:51:42 +00003094 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3095 getRegClass(DstReg), &TRI);
Geoff Berry7c078fc2016-11-29 18:28:32 +00003096 return &*--InsertPt;
3097 }
Geoff Berryd46b6e82017-01-05 21:51:42 +00003098
3099 // Handle cases like spilling def of:
3100 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00003101 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
Geoff Berryd46b6e82017-01-05 21:51:42 +00003102 //
3103 // where the physical register source can be widened and stored to the full
3104 // virtual reg destination stack slot, in this case producing:
3105 //
Francis Visoiu Mistrih0b5bdce2017-12-15 16:33:45 +00003106 // STRXui %xzr, %stack.0
Geoff Berryd46b6e82017-01-05 21:51:42 +00003107 //
3108 if (IsSpill && DstMO.isUndef() &&
3109 TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
3110 assert(SrcMO.getSubReg() == 0 &&
3111 "Unexpected subreg on physical register");
3112 const TargetRegisterClass *SpillRC;
3113 unsigned SpillSubreg;
3114 switch (DstMO.getSubReg()) {
3115 default:
3116 SpillRC = nullptr;
3117 break;
3118 case AArch64::sub_32:
3119 case AArch64::ssub:
3120 if (AArch64::GPR32RegClass.contains(SrcReg)) {
3121 SpillRC = &AArch64::GPR64RegClass;
3122 SpillSubreg = AArch64::sub_32;
3123 } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3124 SpillRC = &AArch64::FPR64RegClass;
3125 SpillSubreg = AArch64::ssub;
3126 } else
3127 SpillRC = nullptr;
3128 break;
3129 case AArch64::dsub:
3130 if (AArch64::FPR64RegClass.contains(SrcReg)) {
3131 SpillRC = &AArch64::FPR128RegClass;
3132 SpillSubreg = AArch64::dsub;
3133 } else
3134 SpillRC = nullptr;
3135 break;
3136 }
3137
3138 if (SpillRC)
3139 if (unsigned WidenedSrcReg =
3140 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3141 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3142 FrameIndex, SpillRC, &TRI);
3143 return &*--InsertPt;
3144 }
3145 }
3146
3147 // Handle cases like filling use of:
3148 //
Francis Visoiu Mistrih93ef1452017-11-30 12:12:19 +00003149 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
Geoff Berryd46b6e82017-01-05 21:51:42 +00003150 //
3151 // where we can load the full virtual reg source stack slot, into the subreg
3152 // destination, in this case producing:
3153 //
Francis Visoiu Mistrih0b5bdce2017-12-15 16:33:45 +00003154 // LDRWui %0:sub_32<def,read-undef>, %stack.0
Geoff Berryd46b6e82017-01-05 21:51:42 +00003155 //
3156 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3157 const TargetRegisterClass *FillRC;
3158 switch (DstMO.getSubReg()) {
3159 default:
3160 FillRC = nullptr;
3161 break;
3162 case AArch64::sub_32:
3163 FillRC = &AArch64::GPR32RegClass;
3164 break;
3165 case AArch64::ssub:
3166 FillRC = &AArch64::FPR32RegClass;
3167 break;
3168 case AArch64::dsub:
3169 FillRC = &AArch64::FPR64RegClass;
3170 break;
3171 }
3172
3173 if (FillRC) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00003174 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3175 TRI.getRegSizeInBits(*FillRC) &&
Geoff Berryd46b6e82017-01-05 21:51:42 +00003176 "Mismatched regclass size on folded subreg COPY");
3177 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3178 MachineInstr &LoadMI = *--InsertPt;
3179 MachineOperand &LoadDst = LoadMI.getOperand(0);
3180 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3181 LoadDst.setSubReg(DstMO.getSubReg());
3182 LoadDst.setIsUndef();
3183 return &LoadMI;
3184 }
3185 }
Geoff Berry7c078fc2016-11-29 18:28:32 +00003186 }
3187
Tim Northover3b0846e2014-05-24 12:50:23 +00003188 // Cannot fold.
3189 return nullptr;
3190}
3191
3192int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
3193 bool *OutUseUnscaledOp,
3194 unsigned *OutUnscaledOp,
3195 int *EmittableOffset) {
3196 int Scale = 1;
3197 bool IsSigned = false;
3198 // The ImmIdx should be changed case by case if it is not 2.
3199 unsigned ImmIdx = 2;
3200 unsigned UnscaledOp = 0;
3201 // Set output values in case of early exit.
3202 if (EmittableOffset)
3203 *EmittableOffset = 0;
3204 if (OutUseUnscaledOp)
3205 *OutUseUnscaledOp = false;
3206 if (OutUnscaledOp)
3207 *OutUnscaledOp = 0;
3208 switch (MI.getOpcode()) {
3209 default:
Craig Topper2a30d782014-06-18 05:05:13 +00003210 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
Tim Northover3b0846e2014-05-24 12:50:23 +00003211 // Vector spills/fills can't take an immediate offset.
3212 case AArch64::LD1Twov2d:
3213 case AArch64::LD1Threev2d:
3214 case AArch64::LD1Fourv2d:
3215 case AArch64::LD1Twov1d:
3216 case AArch64::LD1Threev1d:
3217 case AArch64::LD1Fourv1d:
3218 case AArch64::ST1Twov2d:
3219 case AArch64::ST1Threev2d:
3220 case AArch64::ST1Fourv2d:
3221 case AArch64::ST1Twov1d:
3222 case AArch64::ST1Threev1d:
3223 case AArch64::ST1Fourv1d:
3224 return AArch64FrameOffsetCannotUpdate;
3225 case AArch64::PRFMui:
3226 Scale = 8;
3227 UnscaledOp = AArch64::PRFUMi;
3228 break;
3229 case AArch64::LDRXui:
3230 Scale = 8;
3231 UnscaledOp = AArch64::LDURXi;
3232 break;
3233 case AArch64::LDRWui:
3234 Scale = 4;
3235 UnscaledOp = AArch64::LDURWi;
3236 break;
3237 case AArch64::LDRBui:
3238 Scale = 1;
3239 UnscaledOp = AArch64::LDURBi;
3240 break;
3241 case AArch64::LDRHui:
3242 Scale = 2;
3243 UnscaledOp = AArch64::LDURHi;
3244 break;
3245 case AArch64::LDRSui:
3246 Scale = 4;
3247 UnscaledOp = AArch64::LDURSi;
3248 break;
3249 case AArch64::LDRDui:
3250 Scale = 8;
3251 UnscaledOp = AArch64::LDURDi;
3252 break;
3253 case AArch64::LDRQui:
3254 Scale = 16;
3255 UnscaledOp = AArch64::LDURQi;
3256 break;
3257 case AArch64::LDRBBui:
3258 Scale = 1;
3259 UnscaledOp = AArch64::LDURBBi;
3260 break;
3261 case AArch64::LDRHHui:
3262 Scale = 2;
3263 UnscaledOp = AArch64::LDURHHi;
3264 break;
3265 case AArch64::LDRSBXui:
3266 Scale = 1;
3267 UnscaledOp = AArch64::LDURSBXi;
3268 break;
3269 case AArch64::LDRSBWui:
3270 Scale = 1;
3271 UnscaledOp = AArch64::LDURSBWi;
3272 break;
3273 case AArch64::LDRSHXui:
3274 Scale = 2;
3275 UnscaledOp = AArch64::LDURSHXi;
3276 break;
3277 case AArch64::LDRSHWui:
3278 Scale = 2;
3279 UnscaledOp = AArch64::LDURSHWi;
3280 break;
3281 case AArch64::LDRSWui:
3282 Scale = 4;
3283 UnscaledOp = AArch64::LDURSWi;
3284 break;
3285
3286 case AArch64::STRXui:
3287 Scale = 8;
3288 UnscaledOp = AArch64::STURXi;
3289 break;
3290 case AArch64::STRWui:
3291 Scale = 4;
3292 UnscaledOp = AArch64::STURWi;
3293 break;
3294 case AArch64::STRBui:
3295 Scale = 1;
3296 UnscaledOp = AArch64::STURBi;
3297 break;
3298 case AArch64::STRHui:
3299 Scale = 2;
3300 UnscaledOp = AArch64::STURHi;
3301 break;
3302 case AArch64::STRSui:
3303 Scale = 4;
3304 UnscaledOp = AArch64::STURSi;
3305 break;
3306 case AArch64::STRDui:
3307 Scale = 8;
3308 UnscaledOp = AArch64::STURDi;
3309 break;
3310 case AArch64::STRQui:
3311 Scale = 16;
3312 UnscaledOp = AArch64::STURQi;
3313 break;
3314 case AArch64::STRBBui:
3315 Scale = 1;
3316 UnscaledOp = AArch64::STURBBi;
3317 break;
3318 case AArch64::STRHHui:
3319 Scale = 2;
3320 UnscaledOp = AArch64::STURHHi;
3321 break;
3322
3323 case AArch64::LDPXi:
3324 case AArch64::LDPDi:
3325 case AArch64::STPXi:
3326 case AArch64::STPDi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003327 case AArch64::LDNPXi:
3328 case AArch64::LDNPDi:
3329 case AArch64::STNPXi:
3330 case AArch64::STNPDi:
3331 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003332 IsSigned = true;
3333 Scale = 8;
3334 break;
3335 case AArch64::LDPQi:
3336 case AArch64::STPQi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003337 case AArch64::LDNPQi:
3338 case AArch64::STNPQi:
3339 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003340 IsSigned = true;
3341 Scale = 16;
3342 break;
3343 case AArch64::LDPWi:
3344 case AArch64::LDPSi:
3345 case AArch64::STPWi:
3346 case AArch64::STPSi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00003347 case AArch64::LDNPWi:
3348 case AArch64::LDNPSi:
3349 case AArch64::STNPWi:
3350 case AArch64::STNPSi:
3351 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00003352 IsSigned = true;
3353 Scale = 4;
3354 break;
3355
3356 case AArch64::LDURXi:
3357 case AArch64::LDURWi:
3358 case AArch64::LDURBi:
3359 case AArch64::LDURHi:
3360 case AArch64::LDURSi:
3361 case AArch64::LDURDi:
3362 case AArch64::LDURQi:
3363 case AArch64::LDURHHi:
3364 case AArch64::LDURBBi:
3365 case AArch64::LDURSBXi:
3366 case AArch64::LDURSBWi:
3367 case AArch64::LDURSHXi:
3368 case AArch64::LDURSHWi:
3369 case AArch64::LDURSWi:
3370 case AArch64::STURXi:
3371 case AArch64::STURWi:
3372 case AArch64::STURBi:
3373 case AArch64::STURHi:
3374 case AArch64::STURSi:
3375 case AArch64::STURDi:
3376 case AArch64::STURQi:
3377 case AArch64::STURBBi:
3378 case AArch64::STURHHi:
3379 Scale = 1;
3380 break;
3381 }
3382
3383 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3384
3385 bool useUnscaledOp = false;
3386 // If the offset doesn't match the scale, we rewrite the instruction to
3387 // use the unscaled instruction instead. Likewise, if we have a negative
3388 // offset (and have an unscaled op to use).
3389 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3390 useUnscaledOp = true;
3391
3392 // Use an unscaled addressing mode if the instruction has a negative offset
3393 // (or if the instruction is already using an unscaled addressing mode).
3394 unsigned MaskBits;
3395 if (IsSigned) {
3396 // ldp/stp instructions.
3397 MaskBits = 7;
3398 Offset /= Scale;
3399 } else if (UnscaledOp == 0 || useUnscaledOp) {
3400 MaskBits = 9;
3401 IsSigned = true;
3402 Scale = 1;
3403 } else {
3404 MaskBits = 12;
3405 IsSigned = false;
3406 Offset /= Scale;
3407 }
3408
3409 // Attempt to fold address computation.
3410 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3411 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3412 if (Offset >= MinOff && Offset <= MaxOff) {
3413 if (EmittableOffset)
3414 *EmittableOffset = Offset;
3415 Offset = 0;
3416 } else {
3417 int NewOff = Offset < 0 ? MinOff : MaxOff;
3418 if (EmittableOffset)
3419 *EmittableOffset = NewOff;
3420 Offset = (Offset - NewOff) * Scale;
3421 }
3422 if (OutUseUnscaledOp)
3423 *OutUseUnscaledOp = useUnscaledOp;
3424 if (OutUnscaledOp)
3425 *OutUnscaledOp = UnscaledOp;
3426 return AArch64FrameOffsetCanUpdate |
3427 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3428}
3429
3430bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3431 unsigned FrameReg, int &Offset,
3432 const AArch64InstrInfo *TII) {
3433 unsigned Opcode = MI.getOpcode();
3434 unsigned ImmIdx = FrameRegIdx + 1;
3435
3436 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3437 Offset += MI.getOperand(ImmIdx).getImm();
3438 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3439 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3440 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3441 MI.eraseFromParent();
3442 Offset = 0;
3443 return true;
3444 }
3445
3446 int NewOffset;
3447 unsigned UnscaledOp;
3448 bool UseUnscaledOp;
3449 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3450 &UnscaledOp, &NewOffset);
3451 if (Status & AArch64FrameOffsetCanUpdate) {
3452 if (Status & AArch64FrameOffsetIsLegal)
3453 // Replace the FrameIndex with FrameReg.
3454 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3455 if (UseUnscaledOp)
3456 MI.setDesc(TII->get(UnscaledOp));
3457
3458 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3459 return Offset == 0;
3460 }
3461
3462 return false;
3463}
3464
Hans Wennborg9b9a5352017-04-21 21:48:41 +00003465void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00003466 NopInst.setOpcode(AArch64::HINT);
Jim Grosbache9119e42015-05-13 18:37:00 +00003467 NopInst.addOperand(MCOperand::createImm(0));
Tim Northover3b0846e2014-05-24 12:50:23 +00003468}
Chad Rosier9d1a5562016-05-02 14:56:21 +00003469
3470// AArch64 supports MachineCombiner.
Jessica Paquette809d7082017-07-28 03:21:58 +00003471bool AArch64InstrInfo::useMachineCombiner() const { return true; }
Eugene Zelenko049b0172017-01-06 00:30:53 +00003472
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003473// True when Opc sets flag
3474static bool isCombineInstrSettingFlag(unsigned Opc) {
3475 switch (Opc) {
3476 case AArch64::ADDSWrr:
3477 case AArch64::ADDSWri:
3478 case AArch64::ADDSXrr:
3479 case AArch64::ADDSXri:
3480 case AArch64::SUBSWrr:
3481 case AArch64::SUBSXrr:
3482 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3483 case AArch64::SUBSWri:
3484 case AArch64::SUBSXri:
3485 return true;
3486 default:
3487 break;
3488 }
3489 return false;
3490}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003491
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003492// 32b Opcodes that can be combined with a MUL
3493static bool isCombineInstrCandidate32(unsigned Opc) {
3494 switch (Opc) {
3495 case AArch64::ADDWrr:
3496 case AArch64::ADDWri:
3497 case AArch64::SUBWrr:
3498 case AArch64::ADDSWrr:
3499 case AArch64::ADDSWri:
3500 case AArch64::SUBSWrr:
3501 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3502 case AArch64::SUBWri:
3503 case AArch64::SUBSWri:
3504 return true;
3505 default:
3506 break;
3507 }
3508 return false;
3509}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003510
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003511// 64b Opcodes that can be combined with a MUL
3512static bool isCombineInstrCandidate64(unsigned Opc) {
3513 switch (Opc) {
3514 case AArch64::ADDXrr:
3515 case AArch64::ADDXri:
3516 case AArch64::SUBXrr:
3517 case AArch64::ADDSXrr:
3518 case AArch64::ADDSXri:
3519 case AArch64::SUBSXrr:
3520 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3521 case AArch64::SUBXri:
3522 case AArch64::SUBSXri:
3523 return true;
3524 default:
3525 break;
3526 }
3527 return false;
3528}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003529
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003530// FP Opcodes that can be combined with a FMUL
3531static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3532 switch (Inst.getOpcode()) {
Evandro Menezes19b2aed2016-09-15 19:55:23 +00003533 default:
3534 break;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003535 case AArch64::FADDSrr:
3536 case AArch64::FADDDrr:
3537 case AArch64::FADDv2f32:
3538 case AArch64::FADDv2f64:
3539 case AArch64::FADDv4f32:
3540 case AArch64::FSUBSrr:
3541 case AArch64::FSUBDrr:
3542 case AArch64::FSUBv2f32:
3543 case AArch64::FSUBv2f64:
3544 case AArch64::FSUBv4f32:
Logan Chience542ee2017-01-05 23:41:33 +00003545 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3546 return (Options.UnsafeFPMath ||
3547 Options.AllowFPOpFusion == FPOpFusion::Fast);
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003548 }
3549 return false;
3550}
Eugene Zelenko049b0172017-01-06 00:30:53 +00003551
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003552// Opcodes that can be combined with a MUL
3553static bool isCombineInstrCandidate(unsigned Opc) {
3554 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3555}
3556
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003557//
3558// Utility routine that checks if \param MO is defined by an
3559// \param CombineOpc instruction in the basic block \param MBB
3560static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3561 unsigned CombineOpc, unsigned ZeroReg = 0,
3562 bool CheckZeroReg = false) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003563 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3564 MachineInstr *MI = nullptr;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003565
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003566 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3567 MI = MRI.getUniqueVRegDef(MO.getReg());
3568 // And it needs to be in the trace (otherwise, it won't have a depth).
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003569 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003570 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003571 // Must only used by the user we combine with.
Gerolf Hoflehnerfe2c11f2014-08-13 22:07:36 +00003572 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003573 return false;
3574
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003575 if (CheckZeroReg) {
3576 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3577 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3578 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3579 // The third input reg must be zero.
3580 if (MI->getOperand(3).getReg() != ZeroReg)
3581 return false;
3582 }
3583
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003584 return true;
3585}
3586
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003587//
3588// Is \param MO defined by an integer multiply and can be combined?
3589static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3590 unsigned MulOpc, unsigned ZeroReg) {
3591 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3592}
3593
3594//
3595// Is \param MO defined by a floating-point multiply and can be combined?
3596static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3597 unsigned MulOpc) {
3598 return canCombine(MBB, MO, MulOpc);
3599}
3600
Haicheng Wu08b94622016-01-07 04:01:02 +00003601// TODO: There are many more machine instruction opcodes to match:
3602// 1. Other data types (integer, vectors)
3603// 2. Other math / logic operations (xor, or)
3604// 3. Other forms of the same operation (intrinsics and other variants)
Jessica Paquette809d7082017-07-28 03:21:58 +00003605bool AArch64InstrInfo::isAssociativeAndCommutative(
3606 const MachineInstr &Inst) const {
Haicheng Wu08b94622016-01-07 04:01:02 +00003607 switch (Inst.getOpcode()) {
3608 case AArch64::FADDDrr:
3609 case AArch64::FADDSrr:
3610 case AArch64::FADDv2f32:
3611 case AArch64::FADDv2f64:
3612 case AArch64::FADDv4f32:
3613 case AArch64::FMULDrr:
3614 case AArch64::FMULSrr:
3615 case AArch64::FMULX32:
3616 case AArch64::FMULX64:
3617 case AArch64::FMULXv2f32:
3618 case AArch64::FMULXv2f64:
3619 case AArch64::FMULXv4f32:
3620 case AArch64::FMULv2f32:
3621 case AArch64::FMULv2f64:
3622 case AArch64::FMULv4f32:
3623 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3624 default:
3625 return false;
3626 }
3627}
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003628
Haicheng Wu08b94622016-01-07 04:01:02 +00003629/// Find instructions that can be turned into madd.
3630static bool getMaddPatterns(MachineInstr &Root,
3631 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003632 unsigned Opc = Root.getOpcode();
3633 MachineBasicBlock &MBB = *Root.getParent();
3634 bool Found = false;
3635
3636 if (!isCombineInstrCandidate(Opc))
Chad Rosier85c85942016-03-23 20:07:28 +00003637 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003638 if (isCombineInstrSettingFlag(Opc)) {
3639 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3640 // When NZCV is live bail out.
3641 if (Cmp_NZCV == -1)
Chad Rosier85c85942016-03-23 20:07:28 +00003642 return false;
Chad Rosier6db9ff62017-06-23 19:20:12 +00003643 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003644 // When opcode can't change bail out.
3645 // CHECKME: do we miss any cases for opcode conversion?
3646 if (NewOpc == Opc)
Chad Rosier85c85942016-03-23 20:07:28 +00003647 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003648 Opc = NewOpc;
3649 }
3650
3651 switch (Opc) {
3652 default:
3653 break;
3654 case AArch64::ADDWrr:
3655 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3656 "ADDWrr does not have register operands");
3657 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3658 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003659 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003660 Found = true;
3661 }
3662 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3663 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003664 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003665 Found = true;
3666 }
3667 break;
3668 case AArch64::ADDXrr:
3669 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3670 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003671 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003672 Found = true;
3673 }
3674 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3675 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003676 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003677 Found = true;
3678 }
3679 break;
3680 case AArch64::SUBWrr:
3681 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3682 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003683 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003684 Found = true;
3685 }
3686 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3687 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003688 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003689 Found = true;
3690 }
3691 break;
3692 case AArch64::SUBXrr:
3693 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3694 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003695 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003696 Found = true;
3697 }
3698 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3699 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003700 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003701 Found = true;
3702 }
3703 break;
3704 case AArch64::ADDWri:
3705 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3706 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003707 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003708 Found = true;
3709 }
3710 break;
3711 case AArch64::ADDXri:
3712 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3713 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003714 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003715 Found = true;
3716 }
3717 break;
3718 case AArch64::SUBWri:
3719 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3720 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003721 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003722 Found = true;
3723 }
3724 break;
3725 case AArch64::SUBXri:
3726 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3727 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003728 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003729 Found = true;
3730 }
3731 break;
3732 }
3733 return Found;
3734}
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003735/// Floating-Point Support
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003736
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003737/// Find instructions that can be turned into madd.
3738static bool getFMAPatterns(MachineInstr &Root,
3739 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3740
3741 if (!isCombineInstrCandidateFP(Root))
Eugene Zelenko049b0172017-01-06 00:30:53 +00003742 return false;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003743
3744 MachineBasicBlock &MBB = *Root.getParent();
3745 bool Found = false;
3746
3747 switch (Root.getOpcode()) {
3748 default:
3749 assert(false && "Unsupported FP instruction in combiner\n");
3750 break;
3751 case AArch64::FADDSrr:
3752 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3753 "FADDWrr does not have register operands");
3754 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3755 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3756 Found = true;
3757 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3758 AArch64::FMULv1i32_indexed)) {
3759 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3760 Found = true;
3761 }
3762 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3763 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3764 Found = true;
3765 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3766 AArch64::FMULv1i32_indexed)) {
3767 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3768 Found = true;
3769 }
3770 break;
3771 case AArch64::FADDDrr:
3772 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3773 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3774 Found = true;
3775 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3776 AArch64::FMULv1i64_indexed)) {
3777 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3778 Found = true;
3779 }
3780 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3781 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3782 Found = true;
3783 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3784 AArch64::FMULv1i64_indexed)) {
3785 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3786 Found = true;
3787 }
3788 break;
3789 case AArch64::FADDv2f32:
3790 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3791 AArch64::FMULv2i32_indexed)) {
3792 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3793 Found = true;
3794 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3795 AArch64::FMULv2f32)) {
3796 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3797 Found = true;
3798 }
3799 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3800 AArch64::FMULv2i32_indexed)) {
3801 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3802 Found = true;
3803 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3804 AArch64::FMULv2f32)) {
3805 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3806 Found = true;
3807 }
3808 break;
3809 case AArch64::FADDv2f64:
3810 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3811 AArch64::FMULv2i64_indexed)) {
3812 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3813 Found = true;
3814 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3815 AArch64::FMULv2f64)) {
3816 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3817 Found = true;
3818 }
3819 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3820 AArch64::FMULv2i64_indexed)) {
3821 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3822 Found = true;
3823 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3824 AArch64::FMULv2f64)) {
3825 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3826 Found = true;
3827 }
3828 break;
3829 case AArch64::FADDv4f32:
3830 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3831 AArch64::FMULv4i32_indexed)) {
3832 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3833 Found = true;
3834 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3835 AArch64::FMULv4f32)) {
3836 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3837 Found = true;
3838 }
3839 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3840 AArch64::FMULv4i32_indexed)) {
3841 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3842 Found = true;
3843 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3844 AArch64::FMULv4f32)) {
3845 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3846 Found = true;
3847 }
3848 break;
3849
3850 case AArch64::FSUBSrr:
3851 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3852 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3853 Found = true;
3854 }
3855 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3856 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3857 Found = true;
3858 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3859 AArch64::FMULv1i32_indexed)) {
3860 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3861 Found = true;
3862 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00003863 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3864 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3865 Found = true;
3866 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003867 break;
3868 case AArch64::FSUBDrr:
3869 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3870 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3871 Found = true;
3872 }
3873 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3874 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3875 Found = true;
3876 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3877 AArch64::FMULv1i64_indexed)) {
3878 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3879 Found = true;
3880 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00003881 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3882 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3883 Found = true;
3884 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003885 break;
3886 case AArch64::FSUBv2f32:
3887 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3888 AArch64::FMULv2i32_indexed)) {
3889 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3890 Found = true;
3891 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3892 AArch64::FMULv2f32)) {
3893 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3894 Found = true;
3895 }
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003896 if (canCombineWithFMUL(MBB, Root.getOperand(1),
Matthew Simpson9439f542017-12-27 15:25:01 +00003897 AArch64::FMULv2i32_indexed)) {
3898 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003899 Found = true;
3900 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
Matthew Simpson9439f542017-12-27 15:25:01 +00003901 AArch64::FMULv2f32)) {
3902 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003903 Found = true;
3904 }
Matthew Simpson9439f542017-12-27 15:25:01 +00003905 break;
3906 case AArch64::FSUBv2f64:
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003907 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3908 AArch64::FMULv2i64_indexed)) {
3909 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3910 Found = true;
3911 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3912 AArch64::FMULv2f64)) {
3913 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3914 Found = true;
3915 }
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003916 if (canCombineWithFMUL(MBB, Root.getOperand(1),
Matthew Simpson9439f542017-12-27 15:25:01 +00003917 AArch64::FMULv2i64_indexed)) {
3918 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003919 Found = true;
3920 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
Matthew Simpson9439f542017-12-27 15:25:01 +00003921 AArch64::FMULv2f64)) {
3922 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
Florian Hahn5d6a4e42017-12-06 22:48:36 +00003923 Found = true;
3924 }
Matthew Simpson9439f542017-12-27 15:25:01 +00003925 break;
3926 case AArch64::FSUBv4f32:
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003927 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3928 AArch64::FMULv4i32_indexed)) {
3929 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3930 Found = true;
3931 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3932 AArch64::FMULv4f32)) {
3933 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3934 Found = true;
3935 }
Matthew Simpson9439f542017-12-27 15:25:01 +00003936 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3937 AArch64::FMULv4i32_indexed)) {
3938 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3939 Found = true;
3940 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3941 AArch64::FMULv4f32)) {
3942 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
3943 Found = true;
3944 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003945 break;
3946 }
3947 return Found;
3948}
3949
3950/// Return true when a code sequence can improve throughput. It
3951/// should be called only for instructions in loops.
3952/// \param Pattern - combiner pattern
Jessica Paquette809d7082017-07-28 03:21:58 +00003953bool AArch64InstrInfo::isThroughputPattern(
3954 MachineCombinerPattern Pattern) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003955 switch (Pattern) {
3956 default:
3957 break;
3958 case MachineCombinerPattern::FMULADDS_OP1:
3959 case MachineCombinerPattern::FMULADDS_OP2:
3960 case MachineCombinerPattern::FMULSUBS_OP1:
3961 case MachineCombinerPattern::FMULSUBS_OP2:
3962 case MachineCombinerPattern::FMULADDD_OP1:
3963 case MachineCombinerPattern::FMULADDD_OP2:
3964 case MachineCombinerPattern::FMULSUBD_OP1:
3965 case MachineCombinerPattern::FMULSUBD_OP2:
Chad Rosieraeffffd2017-05-11 20:07:24 +00003966 case MachineCombinerPattern::FNMULSUBS_OP1:
3967 case MachineCombinerPattern::FNMULSUBD_OP1:
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003968 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3969 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3970 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3971 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3972 case MachineCombinerPattern::FMLAv2f32_OP2:
3973 case MachineCombinerPattern::FMLAv2f32_OP1:
3974 case MachineCombinerPattern::FMLAv2f64_OP1:
3975 case MachineCombinerPattern::FMLAv2f64_OP2:
3976 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3977 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3978 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3979 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3980 case MachineCombinerPattern::FMLAv4f32_OP1:
3981 case MachineCombinerPattern::FMLAv4f32_OP2:
3982 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3983 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3984 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3985 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3986 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3987 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3988 case MachineCombinerPattern::FMLSv2f32_OP2:
3989 case MachineCombinerPattern::FMLSv2f64_OP2:
3990 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3991 case MachineCombinerPattern::FMLSv4f32_OP2:
3992 return true;
3993 } // end switch (Pattern)
3994 return false;
3995}
Haicheng Wu08b94622016-01-07 04:01:02 +00003996/// Return true when there is potentially a faster code sequence for an
3997/// instruction chain ending in \p Root. All potential patterns are listed in
3998/// the \p Pattern vector. Pattern should be sorted in priority order since the
3999/// pattern evaluator stops checking as soon as it finds a faster sequence.
4000
4001bool AArch64InstrInfo::getMachineCombinerPatterns(
4002 MachineInstr &Root,
4003 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004004 // Integer patterns
Haicheng Wu08b94622016-01-07 04:01:02 +00004005 if (getMaddPatterns(Root, Patterns))
4006 return true;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004007 // Floating point patterns
4008 if (getFMAPatterns(Root, Patterns))
4009 return true;
Haicheng Wu08b94622016-01-07 04:01:02 +00004010
4011 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4012}
4013
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004014enum class FMAInstKind { Default, Indexed, Accumulator };
4015/// genFusedMultiply - Generate fused multiply instructions.
4016/// This function supports both integer and floating point instructions.
4017/// A typical example:
4018/// F|MUL I=A,B,0
4019/// F|ADD R,I,C
4020/// ==> F|MADD R,A,B,C
Joel Jones7466ccf2017-07-10 22:11:50 +00004021/// \param MF Containing MachineFunction
4022/// \param MRI Register information
4023/// \param TII Target information
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004024/// \param Root is the F|ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00004025/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004026/// contain the generated madd instruction
4027/// \param IdxMulOpd is index of operand in Root that is the result of
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004028/// the F|MUL. In the example above IdxMulOpd is 1.
4029/// \param MaddOpc the opcode fo the f|madd instruction
Joel Jones7466ccf2017-07-10 22:11:50 +00004030/// \param RC Register class of operands
4031/// \param kind of fma instruction (addressing mode) to be generated
Florian Hahn5d6a4e42017-12-06 22:48:36 +00004032/// \param ReplacedAddend is the result register from the instruction
4033/// replacing the non-combined operand, if any.
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004034static MachineInstr *
4035genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
4036 const TargetInstrInfo *TII, MachineInstr &Root,
4037 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4038 unsigned MaddOpc, const TargetRegisterClass *RC,
Florian Hahn5d6a4e42017-12-06 22:48:36 +00004039 FMAInstKind kind = FMAInstKind::Default,
4040 const unsigned *ReplacedAddend = nullptr) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004041 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4042
4043 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4044 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004045 unsigned ResultReg = Root.getOperand(0).getReg();
4046 unsigned SrcReg0 = MUL->getOperand(1).getReg();
4047 bool Src0IsKill = MUL->getOperand(1).isKill();
4048 unsigned SrcReg1 = MUL->getOperand(2).getReg();
4049 bool Src1IsKill = MUL->getOperand(2).isKill();
Florian Hahn5d6a4e42017-12-06 22:48:36 +00004050
4051 unsigned SrcReg2;
4052 bool Src2IsKill;
4053 if (ReplacedAddend) {
4054 // If we just generated a new addend, we must be it's only use.
4055 SrcReg2 = *ReplacedAddend;
4056 Src2IsKill = true;
4057 } else {
4058 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4059 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4060 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004061
4062 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4063 MRI.constrainRegClass(ResultReg, RC);
4064 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4065 MRI.constrainRegClass(SrcReg0, RC);
4066 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4067 MRI.constrainRegClass(SrcReg1, RC);
4068 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
4069 MRI.constrainRegClass(SrcReg2, RC);
4070
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004071 MachineInstrBuilder MIB;
4072 if (kind == FMAInstKind::Default)
4073 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4074 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4075 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4076 .addReg(SrcReg2, getKillRegState(Src2IsKill));
4077 else if (kind == FMAInstKind::Indexed)
4078 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4079 .addReg(SrcReg2, getKillRegState(Src2IsKill))
4080 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4081 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4082 .addImm(MUL->getOperand(3).getImm());
4083 else if (kind == FMAInstKind::Accumulator)
4084 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4085 .addReg(SrcReg2, getKillRegState(Src2IsKill))
4086 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4087 .addReg(SrcReg1, getKillRegState(Src1IsKill));
4088 else
4089 assert(false && "Invalid FMA instruction kind \n");
4090 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004091 InsInstrs.push_back(MIB);
4092 return MUL;
4093}
4094
4095/// genMaddR - Generate madd instruction and combine mul and add using
4096/// an extra virtual register
4097/// Example - an ADD intermediate needs to be stored in a register:
4098/// MUL I=A,B,0
4099/// ADD R,I,Imm
4100/// ==> ORR V, ZR, Imm
4101/// ==> MADD R,A,B,V
Joel Jones7466ccf2017-07-10 22:11:50 +00004102/// \param MF Containing MachineFunction
4103/// \param MRI Register information
4104/// \param TII Target information
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004105/// \param Root is the ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00004106/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004107/// contain the generated madd instruction
4108/// \param IdxMulOpd is index of operand in Root that is the result of
4109/// the MUL. In the example above IdxMulOpd is 1.
4110/// \param MaddOpc the opcode fo the madd instruction
4111/// \param VR is a virtual register that holds the value of an ADD operand
4112/// (V in the example above).
Joel Jones7466ccf2017-07-10 22:11:50 +00004113/// \param RC Register class of operands
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004114static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4115 const TargetInstrInfo *TII, MachineInstr &Root,
4116 SmallVectorImpl<MachineInstr *> &InsInstrs,
Jessica Paquette809d7082017-07-28 03:21:58 +00004117 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4118 const TargetRegisterClass *RC) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004119 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4120
4121 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004122 unsigned ResultReg = Root.getOperand(0).getReg();
4123 unsigned SrcReg0 = MUL->getOperand(1).getReg();
4124 bool Src0IsKill = MUL->getOperand(1).isKill();
4125 unsigned SrcReg1 = MUL->getOperand(2).getReg();
4126 bool Src1IsKill = MUL->getOperand(2).isKill();
4127
4128 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4129 MRI.constrainRegClass(ResultReg, RC);
4130 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4131 MRI.constrainRegClass(SrcReg0, RC);
4132 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4133 MRI.constrainRegClass(SrcReg1, RC);
4134 if (TargetRegisterInfo::isVirtualRegister(VR))
4135 MRI.constrainRegClass(VR, RC);
4136
Jessica Paquette809d7082017-07-28 03:21:58 +00004137 MachineInstrBuilder MIB =
4138 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4139 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4140 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4141 .addReg(VR);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004142 // Insert the MADD
4143 InsInstrs.push_back(MIB);
4144 return MUL;
4145}
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004146
Sanjay Patelcfe03932015-06-19 23:21:42 +00004147/// When getMachineCombinerPatterns() finds potential patterns,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004148/// this function generates the instructions that could replace the
4149/// original code sequence
4150void AArch64InstrInfo::genAlternativeCodeSequence(
Sanjay Patel387e66e2015-11-05 19:34:57 +00004151 MachineInstr &Root, MachineCombinerPattern Pattern,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004152 SmallVectorImpl<MachineInstr *> &InsInstrs,
4153 SmallVectorImpl<MachineInstr *> &DelInstrs,
4154 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4155 MachineBasicBlock &MBB = *Root.getParent();
4156 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4157 MachineFunction &MF = *MBB.getParent();
Eric Christophere0818912014-09-03 20:36:26 +00004158 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004159
4160 MachineInstr *MUL;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004161 const TargetRegisterClass *RC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004162 unsigned Opc;
4163 switch (Pattern) {
4164 default:
Haicheng Wu08b94622016-01-07 04:01:02 +00004165 // Reassociate instructions.
4166 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4167 DelInstrs, InstrIdxForVirtReg);
4168 return;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004169 case MachineCombinerPattern::MULADDW_OP1:
4170 case MachineCombinerPattern::MULADDX_OP1:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004171 // MUL I=A,B,0
4172 // ADD R,I,C
4173 // ==> MADD R,A,B,C
4174 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00004175 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004176 Opc = AArch64::MADDWrrr;
4177 RC = &AArch64::GPR32RegClass;
4178 } else {
4179 Opc = AArch64::MADDXrrr;
4180 RC = &AArch64::GPR64RegClass;
4181 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004182 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004183 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004184 case MachineCombinerPattern::MULADDW_OP2:
4185 case MachineCombinerPattern::MULADDX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004186 // MUL I=A,B,0
4187 // ADD R,C,I
4188 // ==> MADD R,A,B,C
4189 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00004190 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004191 Opc = AArch64::MADDWrrr;
4192 RC = &AArch64::GPR32RegClass;
4193 } else {
4194 Opc = AArch64::MADDXrrr;
4195 RC = &AArch64::GPR64RegClass;
4196 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004197 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004198 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004199 case MachineCombinerPattern::MULADDWI_OP1:
4200 case MachineCombinerPattern::MULADDXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004201 // MUL I=A,B,0
4202 // ADD R,I,Imm
4203 // ==> ORR V, ZR, Imm
4204 // ==> MADD R,A,B,V
4205 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004206 const TargetRegisterClass *OrrRC;
4207 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004208 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004209 OrrOpc = AArch64::ORRWri;
4210 OrrRC = &AArch64::GPR32spRegClass;
4211 BitSize = 32;
4212 ZeroReg = AArch64::WZR;
4213 Opc = AArch64::MADDWrrr;
4214 RC = &AArch64::GPR32RegClass;
4215 } else {
4216 OrrOpc = AArch64::ORRXri;
4217 OrrRC = &AArch64::GPR64spRegClass;
4218 BitSize = 64;
4219 ZeroReg = AArch64::XZR;
4220 Opc = AArch64::MADDXrrr;
4221 RC = &AArch64::GPR64RegClass;
4222 }
4223 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4224 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004225
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004226 if (Root.getOperand(3).isImm()) {
4227 unsigned Val = Root.getOperand(3).getImm();
4228 Imm = Imm << Val;
4229 }
David Majnemer1182dd82016-07-21 23:46:56 +00004230 uint64_t UImm = SignExtend64(Imm, BitSize);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004231 uint64_t Encoding;
4232 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4233 MachineInstrBuilder MIB1 =
4234 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4235 .addReg(ZeroReg)
4236 .addImm(Encoding);
4237 InsInstrs.push_back(MIB1);
4238 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4239 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004240 }
4241 break;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004242 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00004243 case MachineCombinerPattern::MULSUBW_OP1:
4244 case MachineCombinerPattern::MULSUBX_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004245 // MUL I=A,B,0
4246 // SUB R,I, C
4247 // ==> SUB V, 0, C
4248 // ==> MADD R,A,B,V // = -C + A*B
4249 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004250 const TargetRegisterClass *SubRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004251 unsigned SubOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004252 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004253 SubOpc = AArch64::SUBWrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004254 SubRC = &AArch64::GPR32spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004255 ZeroReg = AArch64::WZR;
4256 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004257 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004258 } else {
4259 SubOpc = AArch64::SUBXrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004260 SubRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004261 ZeroReg = AArch64::XZR;
4262 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004263 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004264 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004265 unsigned NewVR = MRI.createVirtualRegister(SubRC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004266 // SUB NewVR, 0, C
4267 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004268 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004269 .addReg(ZeroReg)
Diana Picus116bbab2017-01-13 09:58:52 +00004270 .add(Root.getOperand(2));
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004271 InsInstrs.push_back(MIB1);
4272 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004273 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4274 break;
4275 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00004276 case MachineCombinerPattern::MULSUBW_OP2:
4277 case MachineCombinerPattern::MULSUBX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004278 // MUL I=A,B,0
4279 // SUB R,C,I
4280 // ==> MSUB R,A,B,C (computes C - A*B)
4281 // --- Create(MSUB);
Sanjay Patel387e66e2015-11-05 19:34:57 +00004282 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004283 Opc = AArch64::MSUBWrrr;
4284 RC = &AArch64::GPR32RegClass;
4285 } else {
4286 Opc = AArch64::MSUBXrrr;
4287 RC = &AArch64::GPR64RegClass;
4288 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004289 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004290 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004291 case MachineCombinerPattern::MULSUBWI_OP1:
4292 case MachineCombinerPattern::MULSUBXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004293 // MUL I=A,B,0
4294 // SUB R,I, Imm
4295 // ==> ORR V, ZR, -Imm
4296 // ==> MADD R,A,B,V // = -Imm + A*B
4297 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004298 const TargetRegisterClass *OrrRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004299 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00004300 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
Juergen Ributzka25816b02014-08-30 06:16:26 +00004301 OrrOpc = AArch64::ORRWri;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004302 OrrRC = &AArch64::GPR32spRegClass;
4303 BitSize = 32;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004304 ZeroReg = AArch64::WZR;
4305 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004306 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004307 } else {
4308 OrrOpc = AArch64::ORRXri;
Juergen Ributzkaf9660f02014-11-04 22:20:07 +00004309 OrrRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004310 BitSize = 64;
4311 ZeroReg = AArch64::XZR;
4312 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004313 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004314 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004315 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
David Majnemer1182dd82016-07-21 23:46:56 +00004316 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004317 if (Root.getOperand(3).isImm()) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004318 unsigned Val = Root.getOperand(3).getImm();
4319 Imm = Imm << Val;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004320 }
David Majnemer1182dd82016-07-21 23:46:56 +00004321 uint64_t UImm = SignExtend64(-Imm, BitSize);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004322 uint64_t Encoding;
4323 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4324 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004325 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004326 .addReg(ZeroReg)
4327 .addImm(Encoding);
4328 InsInstrs.push_back(MIB1);
4329 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004330 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004331 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004332 break;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004333 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004334 // Floating Point Support
4335 case MachineCombinerPattern::FMULADDS_OP1:
4336 case MachineCombinerPattern::FMULADDD_OP1:
4337 // MUL I=A,B,0
4338 // ADD R,I,C
4339 // ==> MADD R,A,B,C
4340 // --- Create(MADD);
4341 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4342 Opc = AArch64::FMADDSrrr;
4343 RC = &AArch64::FPR32RegClass;
4344 } else {
4345 Opc = AArch64::FMADDDrrr;
4346 RC = &AArch64::FPR64RegClass;
4347 }
4348 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4349 break;
4350 case MachineCombinerPattern::FMULADDS_OP2:
4351 case MachineCombinerPattern::FMULADDD_OP2:
4352 // FMUL I=A,B,0
4353 // FADD R,C,I
4354 // ==> FMADD R,A,B,C
4355 // --- Create(FMADD);
4356 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4357 Opc = AArch64::FMADDSrrr;
4358 RC = &AArch64::FPR32RegClass;
4359 } else {
4360 Opc = AArch64::FMADDDrrr;
4361 RC = &AArch64::FPR64RegClass;
4362 }
4363 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4364 break;
4365
4366 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4367 Opc = AArch64::FMLAv1i32_indexed;
4368 RC = &AArch64::FPR32RegClass;
4369 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4370 FMAInstKind::Indexed);
4371 break;
4372 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4373 Opc = AArch64::FMLAv1i32_indexed;
4374 RC = &AArch64::FPR32RegClass;
4375 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4376 FMAInstKind::Indexed);
4377 break;
4378
4379 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4380 Opc = AArch64::FMLAv1i64_indexed;
4381 RC = &AArch64::FPR64RegClass;
4382 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4383 FMAInstKind::Indexed);
4384 break;
4385 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4386 Opc = AArch64::FMLAv1i64_indexed;
4387 RC = &AArch64::FPR64RegClass;
4388 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4389 FMAInstKind::Indexed);
4390 break;
4391
4392 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4393 case MachineCombinerPattern::FMLAv2f32_OP1:
4394 RC = &AArch64::FPR64RegClass;
4395 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4396 Opc = AArch64::FMLAv2i32_indexed;
4397 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4398 FMAInstKind::Indexed);
4399 } else {
4400 Opc = AArch64::FMLAv2f32;
4401 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4402 FMAInstKind::Accumulator);
4403 }
4404 break;
4405 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4406 case MachineCombinerPattern::FMLAv2f32_OP2:
4407 RC = &AArch64::FPR64RegClass;
4408 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4409 Opc = AArch64::FMLAv2i32_indexed;
4410 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4411 FMAInstKind::Indexed);
4412 } else {
4413 Opc = AArch64::FMLAv2f32;
4414 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4415 FMAInstKind::Accumulator);
4416 }
4417 break;
4418
4419 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4420 case MachineCombinerPattern::FMLAv2f64_OP1:
4421 RC = &AArch64::FPR128RegClass;
4422 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4423 Opc = AArch64::FMLAv2i64_indexed;
4424 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4425 FMAInstKind::Indexed);
4426 } else {
4427 Opc = AArch64::FMLAv2f64;
4428 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4429 FMAInstKind::Accumulator);
4430 }
4431 break;
4432 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4433 case MachineCombinerPattern::FMLAv2f64_OP2:
4434 RC = &AArch64::FPR128RegClass;
4435 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4436 Opc = AArch64::FMLAv2i64_indexed;
4437 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4438 FMAInstKind::Indexed);
4439 } else {
4440 Opc = AArch64::FMLAv2f64;
4441 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4442 FMAInstKind::Accumulator);
4443 }
4444 break;
4445
4446 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4447 case MachineCombinerPattern::FMLAv4f32_OP1:
4448 RC = &AArch64::FPR128RegClass;
4449 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4450 Opc = AArch64::FMLAv4i32_indexed;
4451 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4452 FMAInstKind::Indexed);
4453 } else {
4454 Opc = AArch64::FMLAv4f32;
4455 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4456 FMAInstKind::Accumulator);
4457 }
4458 break;
4459
4460 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4461 case MachineCombinerPattern::FMLAv4f32_OP2:
4462 RC = &AArch64::FPR128RegClass;
4463 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4464 Opc = AArch64::FMLAv4i32_indexed;
4465 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4466 FMAInstKind::Indexed);
4467 } else {
4468 Opc = AArch64::FMLAv4f32;
4469 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4470 FMAInstKind::Accumulator);
4471 }
4472 break;
4473
4474 case MachineCombinerPattern::FMULSUBS_OP1:
4475 case MachineCombinerPattern::FMULSUBD_OP1: {
4476 // FMUL I=A,B,0
4477 // FSUB R,I,C
4478 // ==> FNMSUB R,A,B,C // = -C + A*B
4479 // --- Create(FNMSUB);
4480 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4481 Opc = AArch64::FNMSUBSrrr;
4482 RC = &AArch64::FPR32RegClass;
4483 } else {
4484 Opc = AArch64::FNMSUBDrrr;
4485 RC = &AArch64::FPR64RegClass;
4486 }
4487 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4488 break;
4489 }
Chad Rosieraeffffd2017-05-11 20:07:24 +00004490
4491 case MachineCombinerPattern::FNMULSUBS_OP1:
4492 case MachineCombinerPattern::FNMULSUBD_OP1: {
4493 // FNMUL I=A,B,0
4494 // FSUB R,I,C
4495 // ==> FNMADD R,A,B,C // = -A*B - C
4496 // --- Create(FNMADD);
4497 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4498 Opc = AArch64::FNMADDSrrr;
4499 RC = &AArch64::FPR32RegClass;
4500 } else {
4501 Opc = AArch64::FNMADDDrrr;
4502 RC = &AArch64::FPR64RegClass;
4503 }
4504 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4505 break;
4506 }
4507
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004508 case MachineCombinerPattern::FMULSUBS_OP2:
4509 case MachineCombinerPattern::FMULSUBD_OP2: {
4510 // FMUL I=A,B,0
4511 // FSUB R,C,I
4512 // ==> FMSUB R,A,B,C (computes C - A*B)
4513 // --- Create(FMSUB);
4514 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4515 Opc = AArch64::FMSUBSrrr;
4516 RC = &AArch64::FPR32RegClass;
4517 } else {
4518 Opc = AArch64::FMSUBDrrr;
4519 RC = &AArch64::FPR64RegClass;
4520 }
4521 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4522 break;
Chad Rosier8b12a032017-05-16 12:43:23 +00004523 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00004524
4525 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4526 Opc = AArch64::FMLSv1i32_indexed;
4527 RC = &AArch64::FPR32RegClass;
4528 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4529 FMAInstKind::Indexed);
4530 break;
4531
4532 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4533 Opc = AArch64::FMLSv1i64_indexed;
4534 RC = &AArch64::FPR64RegClass;
4535 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4536 FMAInstKind::Indexed);
4537 break;
4538
4539 case MachineCombinerPattern::FMLSv2f32_OP2:
4540 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4541 RC = &AArch64::FPR64RegClass;
4542 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4543 Opc = AArch64::FMLSv2i32_indexed;
4544 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4545 FMAInstKind::Indexed);
4546 } else {
4547 Opc = AArch64::FMLSv2f32;
4548 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4549 FMAInstKind::Accumulator);
4550 }
4551 break;
4552
4553 case MachineCombinerPattern::FMLSv2f64_OP2:
4554 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4555 RC = &AArch64::FPR128RegClass;
4556 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4557 Opc = AArch64::FMLSv2i64_indexed;
4558 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4559 FMAInstKind::Indexed);
4560 } else {
4561 Opc = AArch64::FMLSv2f64;
4562 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4563 FMAInstKind::Accumulator);
4564 }
4565 break;
4566
4567 case MachineCombinerPattern::FMLSv4f32_OP2:
4568 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4569 RC = &AArch64::FPR128RegClass;
4570 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4571 Opc = AArch64::FMLSv4i32_indexed;
4572 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4573 FMAInstKind::Indexed);
4574 } else {
4575 Opc = AArch64::FMLSv4f32;
4576 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4577 FMAInstKind::Accumulator);
4578 }
4579 break;
Florian Hahn5d6a4e42017-12-06 22:48:36 +00004580 case MachineCombinerPattern::FMLSv2f32_OP1:
4581 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4582 RC = &AArch64::FPR64RegClass;
4583 unsigned NewVR = MRI.createVirtualRegister(RC);
4584 MachineInstrBuilder MIB1 =
4585 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4586 .add(Root.getOperand(2));
4587 InsInstrs.push_back(MIB1);
4588 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4589 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4590 Opc = AArch64::FMLAv2i32_indexed;
4591 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4592 FMAInstKind::Indexed, &NewVR);
4593 } else {
4594 Opc = AArch64::FMLAv2f32;
4595 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4596 FMAInstKind::Accumulator, &NewVR);
4597 }
4598 break;
4599 }
4600 case MachineCombinerPattern::FMLSv4f32_OP1:
4601 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4602 RC = &AArch64::FPR128RegClass;
4603 unsigned NewVR = MRI.createVirtualRegister(RC);
4604 MachineInstrBuilder MIB1 =
4605 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4606 .add(Root.getOperand(2));
4607 InsInstrs.push_back(MIB1);
4608 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4609 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4610 Opc = AArch64::FMLAv4i32_indexed;
4611 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4612 FMAInstKind::Indexed, &NewVR);
4613 } else {
4614 Opc = AArch64::FMLAv4f32;
4615 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4616 FMAInstKind::Accumulator, &NewVR);
4617 }
4618 break;
4619 }
4620 case MachineCombinerPattern::FMLSv2f64_OP1:
4621 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4622 RC = &AArch64::FPR128RegClass;
4623 unsigned NewVR = MRI.createVirtualRegister(RC);
4624 MachineInstrBuilder MIB1 =
4625 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4626 .add(Root.getOperand(2));
4627 InsInstrs.push_back(MIB1);
4628 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4629 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4630 Opc = AArch64::FMLAv2i64_indexed;
4631 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4632 FMAInstKind::Indexed, &NewVR);
4633 } else {
4634 Opc = AArch64::FMLAv2f64;
4635 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4636 FMAInstKind::Accumulator, &NewVR);
4637 }
4638 break;
4639 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00004640 } // end switch (Pattern)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004641 // Record MUL and ADD/SUB for deletion
4642 DelInstrs.push_back(MUL);
4643 DelInstrs.push_back(&Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00004644}
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004645
Adrian Prantl5f8f34e42018-05-01 15:54:18 +00004646/// Replace csincr-branch sequence by simple conditional branch
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004647///
4648/// Examples:
Joel Jonesaff09bf2017-07-06 14:17:36 +00004649/// 1. \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004650/// csinc w9, wzr, wzr, <condition code>
4651/// tbnz w9, #0, 0x44
Joel Jonesaff09bf2017-07-06 14:17:36 +00004652/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004653/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004654/// \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004655/// b.<inverted condition code>
Joel Jonesaff09bf2017-07-06 14:17:36 +00004656/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004657///
Joel Jonesaff09bf2017-07-06 14:17:36 +00004658/// 2. \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004659/// csinc w9, wzr, wzr, <condition code>
4660/// tbz w9, #0, 0x44
Joel Jonesaff09bf2017-07-06 14:17:36 +00004661/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004662/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004663/// \code
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004664/// b.<condition code>
Joel Jonesaff09bf2017-07-06 14:17:36 +00004665/// \endcode
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004666///
Chad Rosier4aeab5f2016-03-21 13:43:58 +00004667/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4668/// compare's constant operand is power of 2.
Balaram Makame9b27252016-03-10 17:54:55 +00004669///
4670/// Examples:
Joel Jonesaff09bf2017-07-06 14:17:36 +00004671/// \code
Balaram Makame9b27252016-03-10 17:54:55 +00004672/// and w8, w8, #0x400
4673/// cbnz w8, L1
Joel Jonesaff09bf2017-07-06 14:17:36 +00004674/// \endcode
Balaram Makame9b27252016-03-10 17:54:55 +00004675/// to
Joel Jonesaff09bf2017-07-06 14:17:36 +00004676/// \code
Balaram Makame9b27252016-03-10 17:54:55 +00004677/// tbnz w8, #10, L1
Joel Jonesaff09bf2017-07-06 14:17:36 +00004678/// \endcode
Balaram Makame9b27252016-03-10 17:54:55 +00004679///
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004680/// \param MI Conditional Branch
4681/// \return True when the simple conditional branch is generated
4682///
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004683bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004684 bool IsNegativeBranch = false;
4685 bool IsTestAndBranch = false;
4686 unsigned TargetBBInMI = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004687 switch (MI.getOpcode()) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004688 default:
4689 llvm_unreachable("Unknown branch instruction?");
4690 case AArch64::Bcc:
4691 return false;
4692 case AArch64::CBZW:
4693 case AArch64::CBZX:
4694 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004695 break;
4696 case AArch64::CBNZW:
4697 case AArch64::CBNZX:
4698 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004699 IsNegativeBranch = true;
4700 break;
4701 case AArch64::TBZW:
4702 case AArch64::TBZX:
4703 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004704 IsTestAndBranch = true;
4705 break;
4706 case AArch64::TBNZW:
4707 case AArch64::TBNZX:
4708 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004709 IsNegativeBranch = true;
4710 IsTestAndBranch = true;
4711 break;
4712 }
4713 // So we increment a zero register and test for bits other
4714 // than bit 0? Conservatively bail out in case the verifier
4715 // missed this case.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004716 if (IsTestAndBranch && MI.getOperand(1).getImm())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004717 return false;
4718
4719 // Find Definition.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004720 assert(MI.getParent() && "Incomplete machine instruciton\n");
4721 MachineBasicBlock *MBB = MI.getParent();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004722 MachineFunction *MF = MBB->getParent();
4723 MachineRegisterInfo *MRI = &MF->getRegInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004724 unsigned VReg = MI.getOperand(0).getReg();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004725 if (!TargetRegisterInfo::isVirtualRegister(VReg))
4726 return false;
4727
4728 MachineInstr *DefMI = MRI->getVRegDef(VReg);
4729
Balaram Makame9b27252016-03-10 17:54:55 +00004730 // Look through COPY instructions to find definition.
4731 while (DefMI->isCopy()) {
4732 unsigned CopyVReg = DefMI->getOperand(1).getReg();
4733 if (!MRI->hasOneNonDBGUse(CopyVReg))
4734 return false;
4735 if (!MRI->hasOneDef(CopyVReg))
4736 return false;
4737 DefMI = MRI->getVRegDef(CopyVReg);
4738 }
4739
4740 switch (DefMI->getOpcode()) {
4741 default:
4742 return false;
4743 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4744 case AArch64::ANDWri:
4745 case AArch64::ANDXri: {
4746 if (IsTestAndBranch)
4747 return false;
4748 if (DefMI->getParent() != MBB)
4749 return false;
4750 if (!MRI->hasOneNonDBGUse(VReg))
4751 return false;
4752
Quentin Colombetabe2d012016-04-25 20:54:08 +00004753 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
Balaram Makame9b27252016-03-10 17:54:55 +00004754 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
Quentin Colombetabe2d012016-04-25 20:54:08 +00004755 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
Balaram Makame9b27252016-03-10 17:54:55 +00004756 if (!isPowerOf2_64(Mask))
4757 return false;
4758
4759 MachineOperand &MO = DefMI->getOperand(1);
4760 unsigned NewReg = MO.getReg();
4761 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4762 return false;
4763
4764 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4765
4766 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004767 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4768 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004769 unsigned Imm = Log2_64(Mask);
Renato Golin179d1f52016-04-23 19:30:52 +00004770 unsigned Opc = (Imm < 32)
4771 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4772 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004773 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4774 .addReg(NewReg)
4775 .addImm(Imm)
4776 .addMBB(TBB);
Matthias Braune25bbd02016-05-03 04:54:16 +00004777 // Register lives on to the CBZ now.
4778 MO.setIsKill(false);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004779
4780 // For immediate smaller than 32, we need to use the 32-bit
4781 // variant (W) in all cases. Indeed the 64-bit variant does not
4782 // allow to encode them.
4783 // Therefore, if the input register is 64-bit, we need to take the
4784 // 32-bit sub-part.
4785 if (!Is32Bit && Imm < 32)
4786 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004787 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004788 return true;
4789 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004790 // Look for CSINC
Balaram Makame9b27252016-03-10 17:54:55 +00004791 case AArch64::CSINCWr:
4792 case AArch64::CSINCXr: {
4793 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4794 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4795 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4796 DefMI->getOperand(2).getReg() == AArch64::XZR))
4797 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004798
Balaram Makame9b27252016-03-10 17:54:55 +00004799 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4800 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004801
Balaram Makame9b27252016-03-10 17:54:55 +00004802 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
Balaram Makame9b27252016-03-10 17:54:55 +00004803 // Convert only when the condition code is not modified between
4804 // the CSINC and the branch. The CC may be used by other
4805 // instructions in between.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00004806 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
Balaram Makame9b27252016-03-10 17:54:55 +00004807 return false;
4808 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004809 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4810 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004811 if (IsNegativeBranch)
4812 CC = AArch64CC::getInvertedCondCode(CC);
4813 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004814 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004815 return true;
4816 }
4817 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004818}
Alex Lorenzf3630112015-08-18 22:52:15 +00004819
4820std::pair<unsigned, unsigned>
4821AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4822 const unsigned Mask = AArch64II::MO_FRAGMENT;
4823 return std::make_pair(TF & Mask, TF & ~Mask);
4824}
4825
4826ArrayRef<std::pair<unsigned, const char *>>
4827AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4828 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004829
Hal Finkel982e8d42015-08-30 08:07:29 +00004830 static const std::pair<unsigned, const char *> TargetFlags[] = {
Jessica Paquette809d7082017-07-28 03:21:58 +00004831 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4832 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4833 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
Alex Lorenzf3630112015-08-18 22:52:15 +00004834 {MO_HI12, "aarch64-hi12"}};
4835 return makeArrayRef(TargetFlags);
4836}
4837
4838ArrayRef<std::pair<unsigned, const char *>>
4839AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4840 using namespace AArch64II;
Eugene Zelenko049b0172017-01-06 00:30:53 +00004841
Hal Finkel982e8d42015-08-30 08:07:29 +00004842 static const std::pair<unsigned, const char *> TargetFlags[] = {
Jessica Paquette809d7082017-07-28 03:21:58 +00004843 {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
Alex Lorenzf3630112015-08-18 22:52:15 +00004844 return makeArrayRef(TargetFlags);
4845}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004846
Geoff Berry6748abe2017-07-13 02:28:54 +00004847ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
4848AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4849 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
Geoff Berryb1e87142017-07-14 21:44:12 +00004850 {{MOSuppressPair, "aarch64-suppress-pair"},
4851 {MOStridedAccess, "aarch64-strided-access"}};
Geoff Berry6748abe2017-07-13 02:28:54 +00004852 return makeArrayRef(TargetFlags);
4853}
4854
Jessica Paquette02c124d2017-12-18 19:33:21 +00004855 /// Constants defining how certain sequences should be outlined.
4856 /// This encompasses how an outlined function should be called, and what kind of
4857 /// frame should be emitted for that outlined function.
4858 ///
4859 /// \p MachineOutlinerDefault implies that the function should be called with
4860 /// a save and restore of LR to the stack.
4861 ///
4862 /// That is,
4863 ///
4864 /// I1 Save LR OUTLINED_FUNCTION:
4865 /// I2 --> BL OUTLINED_FUNCTION I1
4866 /// I3 Restore LR I2
4867 /// I3
4868 /// RET
4869 ///
4870 /// * Call construction overhead: 3 (save + BL + restore)
4871 /// * Frame construction overhead: 1 (ret)
4872 /// * Requires stack fixups? Yes
4873 ///
4874 /// \p MachineOutlinerTailCall implies that the function is being created from
4875 /// a sequence of instructions ending in a return.
4876 ///
4877 /// That is,
4878 ///
4879 /// I1 OUTLINED_FUNCTION:
4880 /// I2 --> B OUTLINED_FUNCTION I1
4881 /// RET I2
4882 /// RET
4883 ///
4884 /// * Call construction overhead: 1 (B)
4885 /// * Frame construction overhead: 0 (Return included in sequence)
4886 /// * Requires stack fixups? No
4887 ///
4888 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4889 /// a BL instruction, but doesn't require LR to be saved and restored. This
4890 /// happens when LR is known to be dead.
4891 ///
4892 /// That is,
4893 ///
4894 /// I1 OUTLINED_FUNCTION:
4895 /// I2 --> BL OUTLINED_FUNCTION I1
4896 /// I3 I2
4897 /// I3
4898 /// RET
4899 ///
4900 /// * Call construction overhead: 1 (BL)
4901 /// * Frame construction overhead: 1 (RET)
4902 /// * Requires stack fixups? No
4903 ///
Eli Friedman042dc9e2018-05-22 19:11:06 +00004904 /// \p MachineOutlinerThunk implies that the function is being created from
4905 /// a sequence of instructions ending in a call. The outlined function is
4906 /// called with a BL instruction, and the outlined function tail-calls the
4907 /// original call destination.
4908 ///
4909 /// That is,
4910 ///
4911 /// I1 OUTLINED_FUNCTION:
4912 /// I2 --> BL OUTLINED_FUNCTION I1
4913 /// BL f I2
4914 /// B f
4915 /// * Call construction overhead: 1 (BL)
4916 /// * Frame construction overhead: 0
4917 /// * Requires stack fixups? No
4918 ///
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004919enum MachineOutlinerClass {
4920 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4921 MachineOutlinerTailCall, /// Only emit a branch.
Eli Friedman042dc9e2018-05-22 19:11:06 +00004922 MachineOutlinerNoLRSave, /// Emit a call and return.
4923 MachineOutlinerThunk, /// Emit a call and tail-call.
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004924};
Jessica Paquetted87f5442017-07-29 02:55:46 +00004925
Jessica Paquette3291e732018-01-09 00:26:18 +00004926enum MachineOutlinerMBBFlags {
4927 LRUnavailableSomewhere = 0x2,
4928 HasCalls = 0x4
4929};
4930
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004931bool AArch64InstrInfo::canOutlineWithoutLRSave(
4932 MachineBasicBlock::iterator &CallInsertionPt) const {
4933 // Was LR saved in the function containing this basic block?
4934 MachineBasicBlock &MBB = *(CallInsertionPt->getParent());
4935 LiveRegUnits LRU(getRegisterInfo());
4936 LRU.addLiveOuts(MBB);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004937
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004938 // Get liveness information from the end of the block to the end of the
4939 // prospective outlined region.
4940 std::for_each(MBB.rbegin(),
Jessica Paquette02c124d2017-12-18 19:33:21 +00004941 (MachineBasicBlock::reverse_iterator)CallInsertionPt,
4942 [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); });
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004943
4944 // If the link register is available at this point, then we can safely outline
4945 // the region without saving/restoring LR. Otherwise, we must emit a save and
4946 // restore.
4947 return LRU.available(AArch64::LR);
Jessica Paquette809d7082017-07-28 03:21:58 +00004948}
Jessica Paquetteea8cc092017-03-17 22:26:55 +00004949
Jessica Paquetteaa087322018-06-04 21:14:16 +00004950outliner::TargetCostInfo
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004951AArch64InstrInfo::getOutlininingCandidateInfo(
Jessica Paquetteaa087322018-06-04 21:14:16 +00004952 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
Eli Friedman4081a572018-05-18 01:52:16 +00004953 unsigned SequenceSize = std::accumulate(
Jessica Paquetteaa087322018-06-04 21:14:16 +00004954 RepeatedSequenceLocs[0].front(),
4955 std::next(RepeatedSequenceLocs[0].back()),
Eli Friedman4081a572018-05-18 01:52:16 +00004956 0, [this](unsigned Sum, const MachineInstr &MI) {
4957 return Sum + getInstSizeInBytes(MI);
4958 });
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004959 unsigned CallID = MachineOutlinerDefault;
4960 unsigned FrameID = MachineOutlinerDefault;
Eli Friedman4081a572018-05-18 01:52:16 +00004961 unsigned NumBytesForCall = 12;
4962 unsigned NumBytesToCreateFrame = 4;
Jessica Paquette809d7082017-07-28 03:21:58 +00004963
Jessica Paquetteaa087322018-06-04 21:14:16 +00004964 auto DoesntNeedLRSave =
4965 [this](outliner::Candidate &I) {return canOutlineWithoutLRSave(I.back());};
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004966
Jessica Paquetteaa087322018-06-04 21:14:16 +00004967 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
Eli Friedman042dc9e2018-05-22 19:11:06 +00004968
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004969 // If the last instruction in any candidate is a terminator, then we should
4970 // tail call all of the candidates.
Jessica Paquetteaa087322018-06-04 21:14:16 +00004971 if (RepeatedSequenceLocs[0].back()->isTerminator()) {
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004972 CallID = MachineOutlinerTailCall;
4973 FrameID = MachineOutlinerTailCall;
Eli Friedman4081a572018-05-18 01:52:16 +00004974 NumBytesForCall = 4;
4975 NumBytesToCreateFrame = 0;
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004976 }
4977
Eli Friedman042dc9e2018-05-22 19:11:06 +00004978 else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
4979 // FIXME: Do we need to check if the code after this uses the value of LR?
4980 CallID = MachineOutlinerThunk;
4981 FrameID = MachineOutlinerThunk;
4982 NumBytesForCall = 4;
4983 NumBytesToCreateFrame = 0;
4984 }
4985
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004986 else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4987 DoesntNeedLRSave)) {
4988 CallID = MachineOutlinerNoLRSave;
4989 FrameID = MachineOutlinerNoLRSave;
Eli Friedman4081a572018-05-18 01:52:16 +00004990 NumBytesForCall = 4;
4991 NumBytesToCreateFrame = 4;
Jessica Paquette4cf187b2017-09-27 20:47:39 +00004992 }
4993
Jessica Paquette02c124d2017-12-18 19:33:21 +00004994 // Check if the range contains a call. These require a save + restore of the
4995 // link register.
Jessica Paquetteaa087322018-06-04 21:14:16 +00004996 if (std::any_of(RepeatedSequenceLocs[0].front(),
4997 RepeatedSequenceLocs[0].back(),
Jessica Paquette02c124d2017-12-18 19:33:21 +00004998 [](const MachineInstr &MI) { return MI.isCall(); }))
Eli Friedman4081a572018-05-18 01:52:16 +00004999 NumBytesToCreateFrame += 8; // Save + restore the link register.
Jessica Paquette02c124d2017-12-18 19:33:21 +00005000
5001 // Handle the last instruction separately. If this is a tail call, then the
5002 // last instruction is a call. We don't want to save + restore in this case.
5003 // However, it could be possible that the last instruction is a call without
5004 // it being valid to tail call this sequence. We should consider this as well.
Eli Friedman042dc9e2018-05-22 19:11:06 +00005005 else if (FrameID != MachineOutlinerThunk &&
5006 FrameID != MachineOutlinerTailCall &&
Jessica Paquetteaa087322018-06-04 21:14:16 +00005007 RepeatedSequenceLocs[0].back()->isCall())
Eli Friedman4081a572018-05-18 01:52:16 +00005008 NumBytesToCreateFrame += 8;
Jessica Paquette02c124d2017-12-18 19:33:21 +00005009
Jessica Paquetteaa087322018-06-04 21:14:16 +00005010 return outliner::TargetCostInfo(SequenceSize, NumBytesForCall,
Eli Friedman4081a572018-05-18 01:52:16 +00005011 NumBytesToCreateFrame, CallID, FrameID);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005012}
5013
Jessica Paquette02c124d2017-12-18 19:33:21 +00005014bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5015 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
Matthias Braunf1caa282017-12-15 22:22:58 +00005016 const Function &F = MF.getFunction();
Jessica Paquette13593842017-10-07 00:16:34 +00005017
Jessica Paquette13593842017-10-07 00:16:34 +00005018 // Can F be deduplicated by the linker? If it can, don't outline from it.
Matthias Braunf1caa282017-12-15 22:22:58 +00005019 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
Jessica Paquette13593842017-10-07 00:16:34 +00005020 return false;
Matthias Braunf1caa282017-12-15 22:22:58 +00005021
Eli Friedmanda018e52018-04-27 00:21:34 +00005022 // Don't outline from functions with section markings; the program could
5023 // expect that all the code is in the named section.
5024 // FIXME: Allow outlining from multiple functions with the same section
5025 // marking.
5026 if (F.hasSection())
5027 return false;
5028
Jessica Paquette642f6c62018-04-03 21:56:10 +00005029 // Outlining from functions with redzones is unsafe since the outliner may
5030 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5031 // outline from it.
5032 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5033 if (!AFI || AFI->hasRedZone().getValueOr(true))
5034 return false;
5035
5036 // It's safe to outline from MF.
Jessica Paquette13593842017-10-07 00:16:34 +00005037 return true;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005038}
5039
Jessica Paquette3291e732018-01-09 00:26:18 +00005040unsigned
5041AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
5042 unsigned Flags = 0x0;
5043 // Check if there's a call inside this MachineBasicBlock. If there is, then
5044 // set a flag.
5045 if (std::any_of(MBB.begin(), MBB.end(),
5046 [](MachineInstr &MI) { return MI.isCall(); }))
5047 Flags |= MachineOutlinerMBBFlags::HasCalls;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005048
Jessica Paquette3291e732018-01-09 00:26:18 +00005049 // Check if LR is available through all of the MBB. If it's not, then set
5050 // a flag.
5051 LiveRegUnits LRU(getRegisterInfo());
5052 LRU.addLiveOuts(MBB);
5053
5054 std::for_each(MBB.rbegin(),
5055 MBB.rend(),
5056 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5057
5058 if (!LRU.available(AArch64::LR))
5059 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
5060
5061 return Flags;
5062}
5063
Jessica Paquetteaa087322018-06-04 21:14:16 +00005064outliner::InstrType
Jessica Paquette3291e732018-01-09 00:26:18 +00005065AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
5066 unsigned Flags) const {
5067 MachineInstr &MI = *MIT;
5068 MachineBasicBlock *MBB = MI.getParent();
5069 MachineFunction *MF = MBB->getParent();
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005070 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5071
5072 // Don't outline LOHs.
5073 if (FuncInfo->getLOHRelated().count(&MI))
Jessica Paquetteaa087322018-06-04 21:14:16 +00005074 return outliner::InstrType::Illegal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005075
5076 // Don't allow debug values to impact outlining type.
Shiva Chen801bf7e2018-05-09 02:42:00 +00005077 if (MI.isDebugInstr() || MI.isIndirectDebugValue())
Jessica Paquetteaa087322018-06-04 21:14:16 +00005078 return outliner::InstrType::Invisible;
Jessica Paquetteb3e7dc912018-03-16 22:53:34 +00005079
5080 // At this point, KILL instructions don't really tell us much so we can go
5081 // ahead and skip over them.
5082 if (MI.isKill())
Jessica Paquetteaa087322018-06-04 21:14:16 +00005083 return outliner::InstrType::Invisible;
Jessica Paquette3291e732018-01-09 00:26:18 +00005084
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005085 // Is this a terminator for a basic block?
5086 if (MI.isTerminator()) {
5087
5088 // Is this the end of a function?
5089 if (MI.getParent()->succ_empty())
Jessica Paquetteaa087322018-06-04 21:14:16 +00005090 return outliner::InstrType::Legal;
Jessica Paquette3291e732018-01-09 00:26:18 +00005091
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005092 // It's not, so don't outline it.
Jessica Paquetteaa087322018-06-04 21:14:16 +00005093 return outliner::InstrType::Illegal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005094 }
5095
Jessica Paquette2519ee72018-03-27 22:23:48 +00005096 // Make sure none of the operands are un-outlinable.
5097 for (const MachineOperand &MOP : MI.operands()) {
5098 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5099 MOP.isTargetIndex())
Jessica Paquetteaa087322018-06-04 21:14:16 +00005100 return outliner::InstrType::Illegal;
Jessica Paquette4f564282018-04-24 22:38:15 +00005101
5102 // If it uses LR or W30 explicitly, then don't touch it.
5103 if (MOP.isReg() && !MOP.isImplicit() &&
5104 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
Jessica Paquetteaa087322018-06-04 21:14:16 +00005105 return outliner::InstrType::Illegal;
Jessica Paquette2519ee72018-03-27 22:23:48 +00005106 }
5107
Jessica Paquettec191f102018-01-10 18:49:57 +00005108 // Special cases for instructions that can always be outlined, but will fail
5109 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5110 // be outlined because they don't require a *specific* value to be in LR.
5111 if (MI.getOpcode() == AArch64::ADRP)
Jessica Paquetteaa087322018-06-04 21:14:16 +00005112 return outliner::InstrType::Legal;
Jessica Paquettec191f102018-01-10 18:49:57 +00005113
Jessica Paquette4aa14db2018-03-28 17:52:31 +00005114 // If MI is a call we might be able to outline it. We don't want to outline
5115 // any calls that rely on the position of items on the stack. When we outline
5116 // something containing a call, we have to emit a save and restore of LR in
5117 // the outlined function. Currently, this always happens by saving LR to the
5118 // stack. Thus, if we outline, say, half the parameters for a function call
5119 // plus the call, then we'll break the callee's expectations for the layout
5120 // of the stack.
Eli Friedman042dc9e2018-05-22 19:11:06 +00005121 //
5122 // FIXME: Allow calls to functions which construct a stack frame, as long
5123 // as they don't access arguments on the stack.
5124 // FIXME: Figure out some way to analyze functions defined in other modules.
5125 // We should be able to compute the memory usage based on the IR calling
5126 // convention, even if we can't see the definition.
Jessica Paquette02c124d2017-12-18 19:33:21 +00005127 if (MI.isCall()) {
Jessica Paquette02c124d2017-12-18 19:33:21 +00005128 // Get the function associated with the call. Look at each operand and find
5129 // the one that represents the callee and get its name.
Eli Friedman042dc9e2018-05-22 19:11:06 +00005130 const Function *Callee = nullptr;
Jessica Paquette02c124d2017-12-18 19:33:21 +00005131 for (const MachineOperand &MOP : MI.operands()) {
Eli Friedman042dc9e2018-05-22 19:11:06 +00005132 if (MOP.isGlobal()) {
5133 Callee = dyn_cast<Function>(MOP.getGlobal());
Jessica Paquette02c124d2017-12-18 19:33:21 +00005134 break;
5135 }
5136 }
5137
Eli Friedman042dc9e2018-05-22 19:11:06 +00005138 // Never outline calls to mcount. There isn't any rule that would require
5139 // this, but the Linux kernel's "ftrace" feature depends on it.
5140 if (Callee && Callee->getName() == "\01_mcount")
Jessica Paquetteaa087322018-06-04 21:14:16 +00005141 return outliner::InstrType::Illegal;
Jessica Paquette4aa14db2018-03-28 17:52:31 +00005142
Eli Friedman042dc9e2018-05-22 19:11:06 +00005143 // If we don't know anything about the callee, assume it depends on the
5144 // stack layout of the caller. In that case, it's only legal to outline
5145 // as a tail-call. Whitelist the call instructions we know about so we
5146 // don't get unexpected results with call pseudo-instructions.
Jessica Paquetteaa087322018-06-04 21:14:16 +00005147 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
Eli Friedman042dc9e2018-05-22 19:11:06 +00005148 if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
Jessica Paquetteaa087322018-06-04 21:14:16 +00005149 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
Eli Friedman042dc9e2018-05-22 19:11:06 +00005150
5151 if (!Callee)
5152 return UnknownCallOutlineType;
5153
Jessica Paquette02c124d2017-12-18 19:33:21 +00005154 // We have a function we have information about. Check it if it's something
5155 // can safely outline.
Jessica Paquette02c124d2017-12-18 19:33:21 +00005156 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5157
5158 // We don't know what's going on with the callee at all. Don't touch it.
Jessica Paquette4aa14db2018-03-28 17:52:31 +00005159 if (!CalleeMF)
Eli Friedman042dc9e2018-05-22 19:11:06 +00005160 return UnknownCallOutlineType;
Jessica Paquette02c124d2017-12-18 19:33:21 +00005161
Jessica Paquette4aa14db2018-03-28 17:52:31 +00005162 // Check if we know anything about the callee saves on the function. If we
5163 // don't, then don't touch it, since that implies that we haven't
5164 // computed anything about its stack frame yet.
5165 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5166 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5167 MFI.getNumObjects() > 0)
Eli Friedman042dc9e2018-05-22 19:11:06 +00005168 return UnknownCallOutlineType;
Jessica Paquette4aa14db2018-03-28 17:52:31 +00005169
5170 // At this point, we can say that CalleeMF ought to not pass anything on the
5171 // stack. Therefore, we can outline it.
Jessica Paquetteaa087322018-06-04 21:14:16 +00005172 return outliner::InstrType::Legal;
Jessica Paquette02c124d2017-12-18 19:33:21 +00005173 }
5174
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005175 // Don't outline positions.
5176 if (MI.isPosition())
Jessica Paquetteaa087322018-06-04 21:14:16 +00005177 return outliner::InstrType::Illegal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005178
Jessica Paquetted36945b2017-08-08 21:51:26 +00005179 // Don't touch the link register or W30.
5180 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5181 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
Jessica Paquetteaa087322018-06-04 21:14:16 +00005182 return outliner::InstrType::Illegal;
Jessica Paquetted36945b2017-08-08 21:51:26 +00005183
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005184 // Does this use the stack?
5185 if (MI.modifiesRegister(AArch64::SP, &RI) ||
5186 MI.readsRegister(AArch64::SP, &RI)) {
Jessica Paquette3291e732018-01-09 00:26:18 +00005187 // True if there is no chance that any outlined candidate from this range
5188 // could require stack fixups. That is, both
5189 // * LR is available in the range (No save/restore around call)
5190 // * The range doesn't include calls (No save/restore in outlined frame)
5191 // are true.
Eli Friedman042dc9e2018-05-22 19:11:06 +00005192 // FIXME: This is very restrictive; the flags check the whole block,
5193 // not just the bit we will try to outline.
Jessica Paquette3291e732018-01-09 00:26:18 +00005194 bool MightNeedStackFixUp =
5195 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
5196 MachineOutlinerMBBFlags::HasCalls));
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005197
Jessica Paquette3291e732018-01-09 00:26:18 +00005198 // If this instruction is in a range where it *never* needs to be fixed
5199 // up, then we can *always* outline it. This is true even if it's not
5200 // possible to fix that instruction up.
5201 //
5202 // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
5203 // use SP. Suppose that I1 sits within a range that definitely doesn't
5204 // need stack fixups, while I2 sits in a range that does.
5205 //
5206 // First, I1 can be outlined as long as we *never* fix up the stack in
5207 // any sequence containing it. I1 is already a safe instruction in the
5208 // original program, so as long as we don't modify it we're good to go.
5209 // So this leaves us with showing that outlining I2 won't break our
5210 // program.
5211 //
5212 // Suppose I1 and I2 belong to equivalent candidate sequences. When we
5213 // look at I2, we need to see if it can be fixed up. Suppose I2, (and
5214 // thus I1) cannot be fixed up. Then I2 will be assigned an unique
5215 // integer label; thus, I2 cannot belong to any candidate sequence (a
5216 // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
5217 // as well, so we're good. Thus, I1 is always safe to outline.
5218 //
5219 // This gives us two things: first off, it buys us some more instructions
5220 // for our search space by deeming stack instructions illegal only when
5221 // they can't be fixed up AND we might have to fix them up. Second off,
5222 // This allows us to catch tricky instructions like, say,
5223 // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
5224 // be paired with later SUBXris, which might *not* end up being outlined.
5225 // If we mess with the stack to save something, then an ADDXri messes with
5226 // it *after*, then we aren't going to restore the right something from
5227 // the stack if we don't outline the corresponding SUBXri first. ADDXris and
5228 // SUBXris are extremely common in prologue/epilogue code, so supporting
5229 // them in the outliner can be a pretty big win!
5230 if (!MightNeedStackFixUp)
Jessica Paquetteaa087322018-06-04 21:14:16 +00005231 return outliner::InstrType::Legal;
Jessica Paquette3291e732018-01-09 00:26:18 +00005232
Eli Friedmanddbf6d62018-05-16 21:20:16 +00005233 // Any modification of SP will break our code to save/restore LR.
5234 // FIXME: We could handle some instructions which add a constant offset to
5235 // SP, with a bit more work.
5236 if (MI.modifiesRegister(AArch64::SP, &RI))
Jessica Paquetteaa087322018-06-04 21:14:16 +00005237 return outliner::InstrType::Illegal;
Eli Friedmanddbf6d62018-05-16 21:20:16 +00005238
Jessica Paquette3291e732018-01-09 00:26:18 +00005239 // At this point, we have a stack instruction that we might need to fix
5240 // up. We'll handle it if it's a load or store.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005241 if (MI.mayLoadOrStore()) {
Jessica Paquette809d7082017-07-28 03:21:58 +00005242 unsigned Base; // Filled with the base regiser of MI.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005243 int64_t Offset; // Filled with the offset of MI.
5244 unsigned DummyWidth;
5245
5246 // Does it allow us to offset the base register and is the base SP?
5247 if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
Jessica Paquette809d7082017-07-28 03:21:58 +00005248 Base != AArch64::SP)
Jessica Paquetteaa087322018-06-04 21:14:16 +00005249 return outliner::InstrType::Illegal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005250
5251 // Find the minimum/maximum offset for this instruction and check if
5252 // fixing it up would be in range.
Jessica Paquette59948662017-12-07 21:51:43 +00005253 int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
Jessica Paquette02c124d2017-12-18 19:33:21 +00005254 unsigned Scale; // The scale to multiply the offsets by.
5255 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005256
5257 // TODO: We should really test what happens if an instruction overflows.
5258 // This is tricky to test with IR tests, but when the outliner is moved
5259 // to a MIR test, it really ought to be checked.
Jessica Paquette59948662017-12-07 21:51:43 +00005260 Offset += 16; // Update the offset to what it would be if we outlined.
5261 if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
Jessica Paquetteaa087322018-06-04 21:14:16 +00005262 return outliner::InstrType::Illegal;
Galina Kistanova9dee3f02017-12-13 15:26:27 +00005263
5264 // It's in range, so we can outline it.
Jessica Paquetteaa087322018-06-04 21:14:16 +00005265 return outliner::InstrType::Legal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005266 }
5267
Eli Friedmanddbf6d62018-05-16 21:20:16 +00005268 // FIXME: Add handling for instructions like "add x0, sp, #8".
5269
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005270 // We can't fix it up, so don't outline it.
Jessica Paquetteaa087322018-06-04 21:14:16 +00005271 return outliner::InstrType::Illegal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005272 }
5273
Jessica Paquetteaa087322018-06-04 21:14:16 +00005274 return outliner::InstrType::Legal;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005275}
5276
5277void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5278 for (MachineInstr &MI : MBB) {
5279 unsigned Base, Width;
5280 int64_t Offset;
5281
5282 // Is this a load or store with an immediate offset with SP as the base?
5283 if (!MI.mayLoadOrStore() ||
5284 !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
5285 Base != AArch64::SP)
5286 continue;
5287
5288 // It is, so we have to fix it up.
5289 unsigned Scale;
5290 int64_t Dummy1, Dummy2;
5291
5292 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5293 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5294 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5295 assert(Scale != 0 && "Unexpected opcode!");
5296
5297 // We've pushed the return address to the stack, so add 16 to the offset.
5298 // This is safe, since we already checked if it would overflow when we
5299 // checked if this instruction was legal to outline.
Jessica Paquette809d7082017-07-28 03:21:58 +00005300 int64_t NewImm = (Offset + 16) / Scale;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005301 StackOffsetOperand.setImm(NewImm);
5302 }
5303}
5304
Jessica Paquette32de26d2018-06-19 21:14:48 +00005305void AArch64InstrInfo::buildOutlinedFrame(
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005306 MachineBasicBlock &MBB, MachineFunction &MF,
Jessica Paquetteaa087322018-06-04 21:14:16 +00005307 const outliner::TargetCostInfo &TCI) const {
Eli Friedman042dc9e2018-05-22 19:11:06 +00005308 // For thunk outlining, rewrite the last instruction from a call to a
5309 // tail-call.
Jessica Paquetteaa087322018-06-04 21:14:16 +00005310 if (TCI.FrameConstructionID == MachineOutlinerThunk) {
Eli Friedman042dc9e2018-05-22 19:11:06 +00005311 MachineInstr *Call = &*--MBB.instr_end();
5312 unsigned TailOpcode;
5313 if (Call->getOpcode() == AArch64::BL) {
5314 TailOpcode = AArch64::TCRETURNdi;
5315 } else {
5316 assert(Call->getOpcode() == AArch64::BLR);
5317 TailOpcode = AArch64::TCRETURNri;
5318 }
5319 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5320 .add(Call->getOperand(0))
5321 .addImm(0);
5322 MBB.insert(MBB.end(), TC);
5323 Call->eraseFromParent();
5324 }
5325
Jessica Paquette8565d3a2017-12-18 21:44:52 +00005326 // Is there a call in the outlined range?
Eli Friedman02709bc2018-05-16 19:49:01 +00005327 auto IsNonTailCall = [](MachineInstr &MI) {
5328 return MI.isCall() && !MI.isReturn();
5329 };
5330 if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
Jessica Paquette02c124d2017-12-18 19:33:21 +00005331 // Fix up the instructions in the range, since we're going to modify the
5332 // stack.
Jessica Paquetteaa087322018-06-04 21:14:16 +00005333 assert(TCI.FrameConstructionID != MachineOutlinerDefault &&
Eli Friedman042dc9e2018-05-22 19:11:06 +00005334 "Can only fix up stack references once");
Jessica Paquette02c124d2017-12-18 19:33:21 +00005335 fixupPostOutline(MBB);
5336
5337 // LR has to be a live in so that we can save it.
5338 MBB.addLiveIn(AArch64::LR);
5339
5340 MachineBasicBlock::iterator It = MBB.begin();
5341 MachineBasicBlock::iterator Et = MBB.end();
5342
Jessica Paquetteaa087322018-06-04 21:14:16 +00005343 if (TCI.FrameConstructionID == MachineOutlinerTailCall ||
5344 TCI.FrameConstructionID == MachineOutlinerThunk)
Jessica Paquette02c124d2017-12-18 19:33:21 +00005345 Et = std::prev(MBB.end());
5346
5347 // Insert a save before the outlined region
5348 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5349 .addReg(AArch64::SP, RegState::Define)
5350 .addReg(AArch64::LR)
5351 .addReg(AArch64::SP)
5352 .addImm(-16);
5353 It = MBB.insert(It, STRXpre);
5354
Jessica Paquette563548d2018-03-19 22:48:40 +00005355 const TargetSubtargetInfo &STI = MF.getSubtarget();
5356 const MCRegisterInfo *MRI = STI.getRegisterInfo();
5357 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5358
5359 // Add a CFI saying the stack was moved 16 B down.
5360 int64_t StackPosEntry =
5361 MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
5362 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5363 .addCFIIndex(StackPosEntry)
5364 .setMIFlags(MachineInstr::FrameSetup);
5365
5366 // Add a CFI saying that the LR that we want to find is now 16 B higher than
5367 // before.
5368 int64_t LRPosEntry =
5369 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5370 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5371 .addCFIIndex(LRPosEntry)
5372 .setMIFlags(MachineInstr::FrameSetup);
5373
Jessica Paquette02c124d2017-12-18 19:33:21 +00005374 // Insert a restore before the terminator for the function.
5375 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5376 .addReg(AArch64::SP, RegState::Define)
5377 .addReg(AArch64::LR, RegState::Define)
5378 .addReg(AArch64::SP)
5379 .addImm(16);
5380 Et = MBB.insert(Et, LDRXpost);
5381 }
5382
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005383 // If this is a tail call outlined function, then there's already a return.
Jessica Paquetteaa087322018-06-04 21:14:16 +00005384 if (TCI.FrameConstructionID == MachineOutlinerTailCall ||
5385 TCI.FrameConstructionID == MachineOutlinerThunk)
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005386 return;
5387
5388 // It's not a tail call, so we have to insert the return ourselves.
5389 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5390 .addReg(AArch64::LR, RegState::Undef);
5391 MBB.insert(MBB.end(), ret);
5392
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005393 // Did we have to modify the stack by saving the link register?
Jessica Paquetteaa087322018-06-04 21:14:16 +00005394 if (TCI.FrameConstructionID == MachineOutlinerNoLRSave)
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005395 return;
5396
5397 // We modified the stack.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005398 // Walk over the basic block and fix up all the stack accesses.
5399 fixupPostOutline(MBB);
5400}
5401
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005402MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
5403 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
Jessica Paquetteaa087322018-06-04 21:14:16 +00005404 MachineFunction &MF, const outliner::TargetCostInfo &TCI) const {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005405
5406 // Are we tail calling?
Jessica Paquetteaa087322018-06-04 21:14:16 +00005407 if (TCI.CallConstructionID == MachineOutlinerTailCall) {
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005408 // If yes, then we can just branch to the label.
Jessica Paquette2e5ada52018-04-20 18:03:21 +00005409 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5410 .addGlobalAddress(M.getNamedValue(MF.getName()))
5411 .addImm(0));
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005412 return It;
5413 }
5414
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005415 // Are we saving the link register?
Jessica Paquetteaa087322018-06-04 21:14:16 +00005416 if (TCI.CallConstructionID == MachineOutlinerNoLRSave ||
5417 TCI.CallConstructionID == MachineOutlinerThunk) {
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005418 // No, so just insert the call.
5419 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5420 .addGlobalAddress(M.getNamedValue(MF.getName())));
5421 return It;
5422 }
5423
Jessica Paquette0b672492018-04-27 23:36:35 +00005424 // We want to return the spot where we inserted the call.
5425 MachineBasicBlock::iterator CallPt;
5426
Jessica Paquette4cf187b2017-09-27 20:47:39 +00005427 // We have a default call. Save the link register.
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005428 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5429 .addReg(AArch64::SP, RegState::Define)
5430 .addReg(AArch64::LR)
5431 .addReg(AArch64::SP)
5432 .addImm(-16);
5433 It = MBB.insert(It, STRXpre);
5434 It++;
5435
5436 // Insert the call.
Jessica Paquetted87f5442017-07-29 02:55:46 +00005437 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5438 .addGlobalAddress(M.getNamedValue(MF.getName())));
Jessica Paquette0b672492018-04-27 23:36:35 +00005439 CallPt = It;
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005440 It++;
5441
5442 // Restore the link register.
5443 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5444 .addReg(AArch64::SP, RegState::Define)
Jessica Paquette6315d2d2017-08-10 23:11:24 +00005445 .addReg(AArch64::LR, RegState::Define)
Jessica Paquetteea8cc092017-03-17 22:26:55 +00005446 .addReg(AArch64::SP)
5447 .addImm(16);
5448 It = MBB.insert(It, LDRXpost);
5449
Jessica Paquette0b672492018-04-27 23:36:35 +00005450 return CallPt;
Matthew Simpson9439f542017-12-27 15:25:01 +00005451}