blob: e89fc4d0b9cfa6036dc6882c393903c0250f2ff9 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
15#include "AArch64Subtarget.h"
16#include "MCTargetDesc/AArch64AddressingModes.h"
17#include "llvm/CodeGen/MachineFrameInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineMemOperand.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/PseudoSourceValue.h"
22#include "llvm/MC/MCInst.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/TargetRegistry.h"
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +000025#include <algorithm>
Tim Northover3b0846e2014-05-24 12:50:23 +000026
27using namespace llvm;
28
29#define GET_INSTRINFO_CTOR_DTOR
30#include "AArch64GenInstrInfo.inc"
31
George Burgess IV381fc0e2016-08-25 01:05:08 +000032static const MachineMemOperand::Flags MOSuppressPair =
Justin Lebar288b3372016-07-14 18:15:20 +000033 MachineMemOperand::MOTargetFlag1;
34
Matt Arsenaulte8da1452016-08-02 08:06:17 +000035static cl::opt<unsigned>
36TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
37 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
38
39static cl::opt<unsigned>
40CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
41 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
42
43static cl::opt<unsigned>
44BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
45 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
46
Tim Northover3b0846e2014-05-24 12:50:23 +000047AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
48 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
Eric Christophera0de2532015-03-18 20:37:30 +000049 RI(STI.getTargetTriple()), Subtarget(STI) {}
Tim Northover3b0846e2014-05-24 12:50:23 +000050
51/// GetInstSize - Return the number of bytes of code the specified
52/// instruction may be. This returns the maximum number of bytes.
Sjoerd Meijer89217f82016-07-28 16:32:22 +000053unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000054 const MachineBasicBlock &MBB = *MI.getParent();
Tim Northoverd5531f72014-06-17 11:31:42 +000055 const MachineFunction *MF = MBB.getParent();
56 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +000057
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000058 if (MI.getOpcode() == AArch64::INLINEASM)
59 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
Tim Northoverd5531f72014-06-17 11:31:42 +000060
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000061 const MCInstrDesc &Desc = MI.getDesc();
Tim Northover3b0846e2014-05-24 12:50:23 +000062 switch (Desc.getOpcode()) {
63 default:
Diana Picusc65d8bd2016-07-27 15:13:25 +000064 // Anything not explicitly designated otherwise is a normal 4-byte insn.
Tim Northover3b0846e2014-05-24 12:50:23 +000065 return 4;
66 case TargetOpcode::DBG_VALUE:
67 case TargetOpcode::EH_LABEL:
68 case TargetOpcode::IMPLICIT_DEF:
69 case TargetOpcode::KILL:
70 return 0;
Diana Picusab5a4c72016-08-01 08:38:49 +000071 case AArch64::TLSDESC_CALLSEQ:
72 // This gets lowered to an instruction sequence which takes 16 bytes
73 return 16;
Tim Northover3b0846e2014-05-24 12:50:23 +000074 }
75
Diana Picus16c81882016-08-31 12:43:44 +000076 llvm_unreachable("getInstSizeInBytes()- Unable to determine insn size");
Tim Northover3b0846e2014-05-24 12:50:23 +000077}
78
79static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
80 SmallVectorImpl<MachineOperand> &Cond) {
81 // Block ends with fall-through condbranch.
82 switch (LastInst->getOpcode()) {
83 default:
84 llvm_unreachable("Unknown branch instruction?");
85 case AArch64::Bcc:
86 Target = LastInst->getOperand(1).getMBB();
87 Cond.push_back(LastInst->getOperand(0));
88 break;
89 case AArch64::CBZW:
90 case AArch64::CBZX:
91 case AArch64::CBNZW:
92 case AArch64::CBNZX:
93 Target = LastInst->getOperand(1).getMBB();
94 Cond.push_back(MachineOperand::CreateImm(-1));
95 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
96 Cond.push_back(LastInst->getOperand(0));
97 break;
98 case AArch64::TBZW:
99 case AArch64::TBZX:
100 case AArch64::TBNZW:
101 case AArch64::TBNZX:
102 Target = LastInst->getOperand(2).getMBB();
103 Cond.push_back(MachineOperand::CreateImm(-1));
104 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
105 Cond.push_back(LastInst->getOperand(0));
106 Cond.push_back(LastInst->getOperand(1));
107 }
108}
109
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000110static unsigned getBranchDisplacementBits(unsigned Opc) {
111 switch (Opc) {
112 default:
113 llvm_unreachable("unexpected opcode!");
114 case AArch64::TBNZW:
115 case AArch64::TBZW:
116 case AArch64::TBNZX:
117 case AArch64::TBZX:
118 return TBZDisplacementBits;
119 case AArch64::CBNZW:
120 case AArch64::CBZW:
121 case AArch64::CBNZX:
122 case AArch64::CBZX:
123 return CBZDisplacementBits;
124 case AArch64::Bcc:
125 return BCCDisplacementBits;
126 }
127}
128
129static unsigned getBranchMaxDisplacementBytes(unsigned Opc) {
130 if (Opc == AArch64::B)
131 return -1;
132
133 unsigned Bits = getBranchDisplacementBits(Opc);
134 unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
Matt Arsenault6f1ae3c2016-08-02 08:56:52 +0000135
136 // Verify the displacement bits options have sane values.
137 // XXX: Is there a better place for this?
138 assert(MaxOffs >= 8 &&
139 "max branch displacement must be enough to jump"
140 "over conditional branch expansion");
141
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000142 return MaxOffs;
143}
144
145bool AArch64InstrInfo::isBranchInRange(unsigned BranchOp, uint64_t BrOffset,
146 uint64_t DestOffset) const {
147 unsigned MaxOffs = getBranchMaxDisplacementBytes(BranchOp);
148
149 // Branch before the Dest.
150 if (BrOffset <= DestOffset)
151 return (DestOffset - BrOffset <= MaxOffs);
152 return (BrOffset - DestOffset <= MaxOffs);
153}
154
Tim Northover3b0846e2014-05-24 12:50:23 +0000155// Branch analysis.
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000156bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
157 MachineBasicBlock *&TBB,
158 MachineBasicBlock *&FBB,
159 SmallVectorImpl<MachineOperand> &Cond,
160 bool AllowModify) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000161 // If the block has no terminators, it just falls into the block after it.
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000162 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
163 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000164 return false;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000165
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000166 if (!isUnpredicatedTerminator(*I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000167 return false;
168
169 // Get the last instruction in the block.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000170 MachineInstr *LastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000171
172 // If there is only one terminator instruction, process it.
173 unsigned LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000174 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000175 if (isUncondBranchOpcode(LastOpc)) {
176 TBB = LastInst->getOperand(0).getMBB();
177 return false;
178 }
179 if (isCondBranchOpcode(LastOpc)) {
180 // Block ends with fall-through condbranch.
181 parseCondBranch(LastInst, TBB, Cond);
182 return false;
183 }
184 return true; // Can't handle indirect branch.
185 }
186
187 // Get the instruction before it if it is a terminator.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000188 MachineInstr *SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000189 unsigned SecondLastOpc = SecondLastInst->getOpcode();
190
191 // If AllowModify is true and the block ends with two or more unconditional
192 // branches, delete all but the first unconditional branch.
193 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
194 while (isUncondBranchOpcode(SecondLastOpc)) {
195 LastInst->eraseFromParent();
196 LastInst = SecondLastInst;
197 LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000198 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000199 // Return now the only terminator is an unconditional branch.
200 TBB = LastInst->getOperand(0).getMBB();
201 return false;
202 } else {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000203 SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000204 SecondLastOpc = SecondLastInst->getOpcode();
205 }
206 }
207 }
208
209 // If there are three terminators, we don't know what sort of block this is.
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000210 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000211 return true;
212
213 // If the block ends with a B and a Bcc, handle it.
214 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
215 parseCondBranch(SecondLastInst, TBB, Cond);
216 FBB = LastInst->getOperand(0).getMBB();
217 return false;
218 }
219
220 // If the block ends with two unconditional branches, handle it. The second
221 // one is not executed, so remove it.
222 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
223 TBB = SecondLastInst->getOperand(0).getMBB();
224 I = LastInst;
225 if (AllowModify)
226 I->eraseFromParent();
227 return false;
228 }
229
230 // ...likewise if it ends with an indirect branch followed by an unconditional
231 // branch.
232 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
233 I = LastInst;
234 if (AllowModify)
235 I->eraseFromParent();
236 return true;
237 }
238
239 // Otherwise, can't handle this.
240 return true;
241}
242
243bool AArch64InstrInfo::ReverseBranchCondition(
244 SmallVectorImpl<MachineOperand> &Cond) const {
245 if (Cond[0].getImm() != -1) {
246 // Regular Bcc
247 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
248 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
249 } else {
250 // Folded compare-and-branch
251 switch (Cond[1].getImm()) {
252 default:
253 llvm_unreachable("Unknown conditional branch!");
254 case AArch64::CBZW:
255 Cond[1].setImm(AArch64::CBNZW);
256 break;
257 case AArch64::CBNZW:
258 Cond[1].setImm(AArch64::CBZW);
259 break;
260 case AArch64::CBZX:
261 Cond[1].setImm(AArch64::CBNZX);
262 break;
263 case AArch64::CBNZX:
264 Cond[1].setImm(AArch64::CBZX);
265 break;
266 case AArch64::TBZW:
267 Cond[1].setImm(AArch64::TBNZW);
268 break;
269 case AArch64::TBNZW:
270 Cond[1].setImm(AArch64::TBZW);
271 break;
272 case AArch64::TBZX:
273 Cond[1].setImm(AArch64::TBNZX);
274 break;
275 case AArch64::TBNZX:
276 Cond[1].setImm(AArch64::TBZX);
277 break;
278 }
279 }
280
281 return false;
282}
283
284unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000285 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
286 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000287 return 0;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000288
Tim Northover3b0846e2014-05-24 12:50:23 +0000289 if (!isUncondBranchOpcode(I->getOpcode()) &&
290 !isCondBranchOpcode(I->getOpcode()))
291 return 0;
292
293 // Remove the branch.
294 I->eraseFromParent();
295
296 I = MBB.end();
297
298 if (I == MBB.begin())
299 return 1;
300 --I;
301 if (!isCondBranchOpcode(I->getOpcode()))
302 return 1;
303
304 // Remove the branch.
305 I->eraseFromParent();
306 return 2;
307}
308
309void AArch64InstrInfo::instantiateCondBranch(
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000310 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000311 ArrayRef<MachineOperand> Cond) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000312 if (Cond[0].getImm() != -1) {
313 // Regular Bcc
314 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
315 } else {
316 // Folded compare-and-branch
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000317 // Note that we use addOperand instead of addReg to keep the flags.
Tim Northover3b0846e2014-05-24 12:50:23 +0000318 const MachineInstrBuilder MIB =
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000319 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
Tim Northover3b0846e2014-05-24 12:50:23 +0000320 if (Cond.size() > 3)
321 MIB.addImm(Cond[3].getImm());
322 MIB.addMBB(TBB);
323 }
324}
325
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000326unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB,
327 MachineBasicBlock *TBB,
328 MachineBasicBlock *FBB,
329 ArrayRef<MachineOperand> Cond,
330 const DebugLoc &DL) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000331 // Shouldn't be a fall through.
332 assert(TBB && "InsertBranch must not be told to insert a fallthrough");
333
334 if (!FBB) {
335 if (Cond.empty()) // Unconditional branch?
336 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
337 else
338 instantiateCondBranch(MBB, DL, TBB, Cond);
339 return 1;
340 }
341
342 // Two-way conditional branch.
343 instantiateCondBranch(MBB, DL, TBB, Cond);
344 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
345 return 2;
346}
347
348// Find the original register that VReg is copied from.
349static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
350 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
351 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
352 if (!DefMI->isFullCopy())
353 return VReg;
354 VReg = DefMI->getOperand(1).getReg();
355 }
356 return VReg;
357}
358
359// Determine if VReg is defined by an instruction that can be folded into a
360// csel instruction. If so, return the folded opcode, and the replacement
361// register.
362static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
363 unsigned *NewVReg = nullptr) {
364 VReg = removeCopies(MRI, VReg);
365 if (!TargetRegisterInfo::isVirtualRegister(VReg))
366 return 0;
367
368 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
369 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
370 unsigned Opc = 0;
371 unsigned SrcOpNum = 0;
372 switch (DefMI->getOpcode()) {
373 case AArch64::ADDSXri:
374 case AArch64::ADDSWri:
375 // if NZCV is used, do not fold.
376 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
377 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000378 // fall-through to ADDXri and ADDWri.
379 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000380 case AArch64::ADDXri:
381 case AArch64::ADDWri:
382 // add x, 1 -> csinc.
383 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
384 DefMI->getOperand(3).getImm() != 0)
385 return 0;
386 SrcOpNum = 1;
387 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
388 break;
389
390 case AArch64::ORNXrr:
391 case AArch64::ORNWrr: {
392 // not x -> csinv, represented as orn dst, xzr, src.
393 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
394 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
395 return 0;
396 SrcOpNum = 2;
397 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
398 break;
399 }
400
401 case AArch64::SUBSXrr:
402 case AArch64::SUBSWrr:
403 // if NZCV is used, do not fold.
404 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
405 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000406 // fall-through to SUBXrr and SUBWrr.
407 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000408 case AArch64::SUBXrr:
409 case AArch64::SUBWrr: {
410 // neg x -> csneg, represented as sub dst, xzr, src.
411 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
412 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
413 return 0;
414 SrcOpNum = 2;
415 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
416 break;
417 }
418 default:
419 return 0;
420 }
421 assert(Opc && SrcOpNum && "Missing parameters");
422
423 if (NewVReg)
424 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
425 return Opc;
426}
427
428bool AArch64InstrInfo::canInsertSelect(
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000429 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000430 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
431 int &FalseCycles) const {
432 // Check register classes.
433 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
434 const TargetRegisterClass *RC =
Eric Christophera0de2532015-03-18 20:37:30 +0000435 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
Tim Northover3b0846e2014-05-24 12:50:23 +0000436 if (!RC)
437 return false;
438
439 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
440 unsigned ExtraCondLat = Cond.size() != 1;
441
442 // GPRs are handled by csel.
443 // FIXME: Fold in x+1, -x, and ~x when applicable.
444 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
445 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
446 // Single-cycle csel, csinc, csinv, and csneg.
447 CondCycles = 1 + ExtraCondLat;
448 TrueCycles = FalseCycles = 1;
449 if (canFoldIntoCSel(MRI, TrueReg))
450 TrueCycles = 0;
451 else if (canFoldIntoCSel(MRI, FalseReg))
452 FalseCycles = 0;
453 return true;
454 }
455
456 // Scalar floating point is handled by fcsel.
457 // FIXME: Form fabs, fmin, and fmax when applicable.
458 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
459 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
460 CondCycles = 5 + ExtraCondLat;
461 TrueCycles = FalseCycles = 2;
462 return true;
463 }
464
465 // Can't do vectors.
466 return false;
467}
468
469void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000470 MachineBasicBlock::iterator I,
471 const DebugLoc &DL, unsigned DstReg,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000472 ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000473 unsigned TrueReg, unsigned FalseReg) const {
474 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
475
476 // Parse the condition code, see parseCondBranch() above.
477 AArch64CC::CondCode CC;
478 switch (Cond.size()) {
479 default:
480 llvm_unreachable("Unknown condition opcode in Cond");
481 case 1: // b.cc
482 CC = AArch64CC::CondCode(Cond[0].getImm());
483 break;
484 case 3: { // cbz/cbnz
485 // We must insert a compare against 0.
486 bool Is64Bit;
487 switch (Cond[1].getImm()) {
488 default:
489 llvm_unreachable("Unknown branch opcode in Cond");
490 case AArch64::CBZW:
491 Is64Bit = 0;
492 CC = AArch64CC::EQ;
493 break;
494 case AArch64::CBZX:
495 Is64Bit = 1;
496 CC = AArch64CC::EQ;
497 break;
498 case AArch64::CBNZW:
499 Is64Bit = 0;
500 CC = AArch64CC::NE;
501 break;
502 case AArch64::CBNZX:
503 Is64Bit = 1;
504 CC = AArch64CC::NE;
505 break;
506 }
507 unsigned SrcReg = Cond[2].getReg();
508 if (Is64Bit) {
509 // cmp reg, #0 is actually subs xzr, reg, #0.
510 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
511 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
512 .addReg(SrcReg)
513 .addImm(0)
514 .addImm(0);
515 } else {
516 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
517 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
518 .addReg(SrcReg)
519 .addImm(0)
520 .addImm(0);
521 }
522 break;
523 }
524 case 4: { // tbz/tbnz
525 // We must insert a tst instruction.
526 switch (Cond[1].getImm()) {
527 default:
528 llvm_unreachable("Unknown branch opcode in Cond");
529 case AArch64::TBZW:
530 case AArch64::TBZX:
531 CC = AArch64CC::EQ;
532 break;
533 case AArch64::TBNZW:
534 case AArch64::TBNZX:
535 CC = AArch64CC::NE;
536 break;
537 }
538 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
539 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
540 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
541 .addReg(Cond[2].getReg())
542 .addImm(
543 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
544 else
545 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
546 .addReg(Cond[2].getReg())
547 .addImm(
548 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
549 break;
550 }
551 }
552
553 unsigned Opc = 0;
554 const TargetRegisterClass *RC = nullptr;
555 bool TryFold = false;
556 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
557 RC = &AArch64::GPR64RegClass;
558 Opc = AArch64::CSELXr;
559 TryFold = true;
560 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
561 RC = &AArch64::GPR32RegClass;
562 Opc = AArch64::CSELWr;
563 TryFold = true;
564 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
565 RC = &AArch64::FPR64RegClass;
566 Opc = AArch64::FCSELDrrr;
567 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
568 RC = &AArch64::FPR32RegClass;
569 Opc = AArch64::FCSELSrrr;
570 }
571 assert(RC && "Unsupported regclass");
572
573 // Try folding simple instructions into the csel.
574 if (TryFold) {
575 unsigned NewVReg = 0;
576 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
577 if (FoldedOpc) {
578 // The folded opcodes csinc, csinc and csneg apply the operation to
579 // FalseReg, so we need to invert the condition.
580 CC = AArch64CC::getInvertedCondCode(CC);
581 TrueReg = FalseReg;
582 } else
583 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
584
585 // Fold the operation. Leave any dead instructions for DCE to clean up.
586 if (FoldedOpc) {
587 FalseReg = NewVReg;
588 Opc = FoldedOpc;
589 // The extends the live range of NewVReg.
590 MRI.clearKillFlags(NewVReg);
591 }
592 }
593
594 // Pull all virtual register into the appropriate class.
595 MRI.constrainRegClass(TrueReg, RC);
596 MRI.constrainRegClass(FalseReg, RC);
597
598 // Insert the csel.
599 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
600 CC);
601}
602
Lawrence Hu687097a2015-07-23 23:55:28 +0000603/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000604static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
605 uint64_t Imm = MI.getOperand(1).getImm();
Weiming Zhaob33a5552015-07-23 19:24:53 +0000606 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
607 uint64_t Encoding;
608 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
609}
610
Jiangning Liucd296372014-07-29 02:09:26 +0000611// FIXME: this implementation should be micro-architecture dependent, so a
612// micro-architecture target hook should be introduced here in future.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000613bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
Matthias Braun651cff42016-06-02 18:03:53 +0000614 if (!Subtarget.hasCustomCheapAsMoveHandling())
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000615 return MI.isAsCheapAsAMove();
Jiangning Liucd296372014-07-29 02:09:26 +0000616
Evandro Menezesd23324a2016-05-04 20:47:25 +0000617 unsigned Imm;
618
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000619 switch (MI.getOpcode()) {
Jiangning Liucd296372014-07-29 02:09:26 +0000620 default:
621 return false;
622
623 // add/sub on register without shift
624 case AArch64::ADDWri:
625 case AArch64::ADDXri:
626 case AArch64::SUBWri:
627 case AArch64::SUBXri:
Matthias Braun651cff42016-06-02 18:03:53 +0000628 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000629 MI.getOperand(3).getImm() == 0);
Evandro Menezesd23324a2016-05-04 20:47:25 +0000630
631 // add/sub on register with shift
632 case AArch64::ADDWrs:
633 case AArch64::ADDXrs:
634 case AArch64::SUBWrs:
635 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000636 Imm = MI.getOperand(3).getImm();
Matthias Braun651cff42016-06-02 18:03:53 +0000637 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
Evandro Menezesd23324a2016-05-04 20:47:25 +0000638 AArch64_AM::getArithShiftValue(Imm) < 4);
Jiangning Liucd296372014-07-29 02:09:26 +0000639
640 // logical ops on immediate
641 case AArch64::ANDWri:
642 case AArch64::ANDXri:
643 case AArch64::EORWri:
644 case AArch64::EORXri:
645 case AArch64::ORRWri:
646 case AArch64::ORRXri:
647 return true;
648
649 // logical ops on register without shift
650 case AArch64::ANDWrr:
651 case AArch64::ANDXrr:
652 case AArch64::BICWrr:
653 case AArch64::BICXrr:
654 case AArch64::EONWrr:
655 case AArch64::EONXrr:
656 case AArch64::EORWrr:
657 case AArch64::EORXrr:
658 case AArch64::ORNWrr:
659 case AArch64::ORNXrr:
660 case AArch64::ORRWrr:
661 case AArch64::ORRXrr:
662 return true;
Evandro Menezesd23324a2016-05-04 20:47:25 +0000663
664 // logical ops on register with shift
665 case AArch64::ANDWrs:
666 case AArch64::ANDXrs:
667 case AArch64::BICWrs:
668 case AArch64::BICXrs:
669 case AArch64::EONWrs:
670 case AArch64::EONXrs:
671 case AArch64::EORWrs:
672 case AArch64::EORXrs:
673 case AArch64::ORNWrs:
674 case AArch64::ORNXrs:
675 case AArch64::ORRWrs:
676 case AArch64::ORRXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000677 Imm = MI.getOperand(3).getImm();
Matthias Braun651cff42016-06-02 18:03:53 +0000678 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
Evandro Menezesd23324a2016-05-04 20:47:25 +0000679 AArch64_AM::getShiftValue(Imm) < 4 &&
680 AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
681
Weiming Zhaob33a5552015-07-23 19:24:53 +0000682 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
683 // ORRXri, it is as cheap as MOV
684 case AArch64::MOVi32imm:
685 return canBeExpandedToORR(MI, 32);
686 case AArch64::MOVi64imm:
687 return canBeExpandedToORR(MI, 64);
Haicheng Wu711ca862016-07-12 15:31:41 +0000688
Haicheng Wuf0b01272016-07-15 00:27:01 +0000689 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
690 // feature.
Haicheng Wu711ca862016-07-12 15:31:41 +0000691 case AArch64::FMOVS0:
692 case AArch64::FMOVD0:
693 return Subtarget.hasZeroCycleZeroing();
Haicheng Wuf0b01272016-07-15 00:27:01 +0000694 case TargetOpcode::COPY:
695 return (Subtarget.hasZeroCycleZeroing() &&
696 (MI.getOperand(1).getReg() == AArch64::WZR ||
697 MI.getOperand(1).getReg() == AArch64::XZR));
Jiangning Liucd296372014-07-29 02:09:26 +0000698 }
699
700 llvm_unreachable("Unknown opcode to check as cheap as a move!");
701}
702
Tim Northover3b0846e2014-05-24 12:50:23 +0000703bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
704 unsigned &SrcReg, unsigned &DstReg,
705 unsigned &SubIdx) const {
706 switch (MI.getOpcode()) {
707 default:
708 return false;
709 case AArch64::SBFMXri: // aka sxtw
710 case AArch64::UBFMXri: // aka uxtw
711 // Check for the 32 -> 64 bit extension case, these instructions can do
712 // much more.
713 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
714 return false;
715 // This is a signed or unsigned 32 -> 64 bit extension.
716 SrcReg = MI.getOperand(1).getReg();
717 DstReg = MI.getOperand(0).getReg();
718 SubIdx = AArch64::sub_32;
719 return true;
720 }
721}
722
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000723bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
724 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
Eric Christophera0de2532015-03-18 20:37:30 +0000725 const TargetRegisterInfo *TRI = &getRegisterInfo();
Chad Rosier3528c1e2014-09-08 14:43:48 +0000726 unsigned BaseRegA = 0, BaseRegB = 0;
Chad Rosier0da267d2016-03-09 16:46:48 +0000727 int64_t OffsetA = 0, OffsetB = 0;
728 unsigned WidthA = 0, WidthB = 0;
Chad Rosier3528c1e2014-09-08 14:43:48 +0000729
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000730 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
731 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
Chad Rosier3528c1e2014-09-08 14:43:48 +0000732
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000733 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
734 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
Chad Rosier3528c1e2014-09-08 14:43:48 +0000735 return false;
736
737 // Retrieve the base register, offset from the base register and width. Width
738 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
739 // base registers are identical, and the offset of a lower memory access +
740 // the width doesn't overlap the offset of a higher memory access,
741 // then the memory accesses are different.
Sanjoy Dasb666ea32015-06-15 18:44:14 +0000742 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
743 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
Chad Rosier3528c1e2014-09-08 14:43:48 +0000744 if (BaseRegA == BaseRegB) {
745 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
746 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
747 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
748 if (LowOffset + LowWidth <= HighOffset)
749 return true;
750 }
751 }
752 return false;
753}
754
Tim Northover3b0846e2014-05-24 12:50:23 +0000755/// analyzeCompare - For a comparison instruction, return the source registers
756/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
757/// Return true if the comparison instruction can be analyzed.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000758bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
Tim Northover3b0846e2014-05-24 12:50:23 +0000759 unsigned &SrcReg2, int &CmpMask,
760 int &CmpValue) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000761 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000762 default:
763 break;
764 case AArch64::SUBSWrr:
765 case AArch64::SUBSWrs:
766 case AArch64::SUBSWrx:
767 case AArch64::SUBSXrr:
768 case AArch64::SUBSXrs:
769 case AArch64::SUBSXrx:
770 case AArch64::ADDSWrr:
771 case AArch64::ADDSWrs:
772 case AArch64::ADDSWrx:
773 case AArch64::ADDSXrr:
774 case AArch64::ADDSXrs:
775 case AArch64::ADDSXrx:
776 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000777 SrcReg = MI.getOperand(1).getReg();
778 SrcReg2 = MI.getOperand(2).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000779 CmpMask = ~0;
780 CmpValue = 0;
781 return true;
782 case AArch64::SUBSWri:
783 case AArch64::ADDSWri:
784 case AArch64::SUBSXri:
785 case AArch64::ADDSXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000786 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000787 SrcReg2 = 0;
788 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000789 // FIXME: In order to convert CmpValue to 0 or 1
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000790 CmpValue = MI.getOperand(2).getImm() != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000791 return true;
792 case AArch64::ANDSWri:
793 case AArch64::ANDSXri:
794 // ANDS does not use the same encoding scheme as the others xxxS
795 // instructions.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000796 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000797 SrcReg2 = 0;
798 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000799 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
800 // while the type of CmpValue is int. When converting uint64_t to int,
801 // the high 32 bits of uint64_t will be lost.
802 // In fact it causes a bug in spec2006-483.xalancbmk
803 // CmpValue is only used to compare with zero in OptimizeCompareInstr
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000804 CmpValue = AArch64_AM::decodeLogicalImmediate(
805 MI.getOperand(2).getImm(),
806 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000807 return true;
808 }
809
810 return false;
811}
812
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000813static bool UpdateOperandRegClass(MachineInstr &Instr) {
814 MachineBasicBlock *MBB = Instr.getParent();
Tim Northover3b0846e2014-05-24 12:50:23 +0000815 assert(MBB && "Can't get MachineBasicBlock here");
816 MachineFunction *MF = MBB->getParent();
817 assert(MF && "Can't get MachineFunction here");
Eric Christopher6c901622015-01-28 03:51:33 +0000818 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
819 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000820 MachineRegisterInfo *MRI = &MF->getRegInfo();
821
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000822 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
Tim Northover3b0846e2014-05-24 12:50:23 +0000823 ++OpIdx) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000824 MachineOperand &MO = Instr.getOperand(OpIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +0000825 const TargetRegisterClass *OpRegCstraints =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000826 Instr.getRegClassConstraint(OpIdx, TII, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000827
828 // If there's no constraint, there's nothing to do.
829 if (!OpRegCstraints)
830 continue;
831 // If the operand is a frame index, there's nothing to do here.
832 // A frame index operand will resolve correctly during PEI.
833 if (MO.isFI())
834 continue;
835
836 assert(MO.isReg() &&
837 "Operand has register constraints without being a register!");
838
839 unsigned Reg = MO.getReg();
840 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
841 if (!OpRegCstraints->contains(Reg))
842 return false;
843 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
844 !MRI->constrainRegClass(Reg, OpRegCstraints))
845 return false;
846 }
847
848 return true;
849}
850
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000851/// \brief Return the opcode that does not set flags when possible - otherwise
852/// return the original opcode. The caller is responsible to do the actual
853/// substitution and legality checking.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000854static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000855 // Don't convert all compare instructions, because for some the zero register
856 // encoding becomes the sp register.
857 bool MIDefinesZeroReg = false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000858 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000859 MIDefinesZeroReg = true;
860
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000861 switch (MI.getOpcode()) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000862 default:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000863 return MI.getOpcode();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000864 case AArch64::ADDSWrr:
865 return AArch64::ADDWrr;
866 case AArch64::ADDSWri:
867 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
868 case AArch64::ADDSWrs:
869 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
870 case AArch64::ADDSWrx:
871 return AArch64::ADDWrx;
872 case AArch64::ADDSXrr:
873 return AArch64::ADDXrr;
874 case AArch64::ADDSXri:
875 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
876 case AArch64::ADDSXrs:
877 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
878 case AArch64::ADDSXrx:
879 return AArch64::ADDXrx;
880 case AArch64::SUBSWrr:
881 return AArch64::SUBWrr;
882 case AArch64::SUBSWri:
883 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
884 case AArch64::SUBSWrs:
885 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
886 case AArch64::SUBSWrx:
887 return AArch64::SUBWrx;
888 case AArch64::SUBSXrr:
889 return AArch64::SUBXrr;
890 case AArch64::SUBSXri:
891 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
892 case AArch64::SUBSXrs:
893 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
894 case AArch64::SUBSXrx:
895 return AArch64::SUBXrx;
896 }
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000897}
Tim Northover3b0846e2014-05-24 12:50:23 +0000898
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000899enum AccessKind {
900 AK_Write = 0x01,
901 AK_Read = 0x10,
902 AK_All = 0x11
903};
904
905/// True when condition flags are accessed (either by writing or reading)
906/// on the instruction trace starting at From and ending at To.
907///
908/// Note: If From and To are from different blocks it's assumed CC are accessed
909/// on the path.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000910static bool areCFlagsAccessedBetweenInstrs(
911 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
912 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000913 // Early exit if To is at the beginning of the BB.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000914 if (To == To->getParent()->begin())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000915 return true;
916
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000917 // Check whether the instructions are in the same basic block
918 // If not, assume the condition flags might get modified somewhere.
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000919 if (To->getParent() != From->getParent())
920 return true;
921
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000922 // From must be above To.
923 assert(std::find_if(MachineBasicBlock::reverse_iterator(To),
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000924 To->getParent()->rend(), [From](MachineInstr &MI) {
925 return MachineBasicBlock::iterator(MI) == From;
926 }) != To->getParent()->rend());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000927
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000928 // We iterate backward starting \p To until we hit \p From.
929 for (--To; To != From; --To) {
930 const MachineInstr &Instr = *To;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000931
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000932 if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
933 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000934 return true;
935 }
936 return false;
937}
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000938
939/// Try to optimize a compare instruction. A compare instruction is an
940/// instruction which produces AArch64::NZCV. It can be truly compare instruction
941/// when there are no uses of its destination register.
942///
943/// The following steps are tried in order:
944/// 1. Convert CmpInstr into an unconditional version.
945/// 2. Remove CmpInstr if above there is an instruction producing a needed
946/// condition code or an instruction which can be converted into such an instruction.
947/// Only comparison with zero is supported.
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000948bool AArch64InstrInfo::optimizeCompareInstr(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000949 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000950 int CmpValue, const MachineRegisterInfo *MRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000951 assert(CmpInstr.getParent());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000952 assert(MRI);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000953
954 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000955 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000956 if (DeadNZCVIdx != -1) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000957 if (CmpInstr.definesRegister(AArch64::WZR) ||
958 CmpInstr.definesRegister(AArch64::XZR)) {
959 CmpInstr.eraseFromParent();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000960 return true;
961 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000962 unsigned Opc = CmpInstr.getOpcode();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000963 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
964 if (NewOpc == Opc)
965 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000966 const MCInstrDesc &MCID = get(NewOpc);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000967 CmpInstr.setDesc(MCID);
968 CmpInstr.RemoveOperand(DeadNZCVIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +0000969 bool succeeded = UpdateOperandRegClass(CmpInstr);
970 (void)succeeded;
971 assert(succeeded && "Some operands reg class are incompatible!");
972 return true;
973 }
974
975 // Continue only if we have a "ri" where immediate is zero.
Jiangning Liudcc651f2014-08-08 14:19:29 +0000976 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
977 // function.
978 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
Tim Northover3b0846e2014-05-24 12:50:23 +0000979 if (CmpValue != 0 || SrcReg2 != 0)
980 return false;
981
982 // CmpInstr is a Compare instruction if destination register is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000983 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
Tim Northover3b0846e2014-05-24 12:50:23 +0000984 return false;
985
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +0000986 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000987}
Tim Northover3b0846e2014-05-24 12:50:23 +0000988
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000989/// Get opcode of S version of Instr.
990/// If Instr is S version its opcode is returned.
991/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
992/// or we are not interested in it.
993static unsigned sForm(MachineInstr &Instr) {
994 switch (Instr.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000995 default:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000996 return AArch64::INSTRUCTION_LIST_END;
997
Tim Northover3b0846e2014-05-24 12:50:23 +0000998 case AArch64::ADDSWrr:
999 case AArch64::ADDSWri:
1000 case AArch64::ADDSXrr:
1001 case AArch64::ADDSXri:
1002 case AArch64::SUBSWrr:
1003 case AArch64::SUBSWri:
1004 case AArch64::SUBSXrr:
1005 case AArch64::SUBSXri:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001006 return Instr.getOpcode();;
1007
1008 case AArch64::ADDWrr: return AArch64::ADDSWrr;
1009 case AArch64::ADDWri: return AArch64::ADDSWri;
1010 case AArch64::ADDXrr: return AArch64::ADDSXrr;
1011 case AArch64::ADDXri: return AArch64::ADDSXri;
1012 case AArch64::ADCWr: return AArch64::ADCSWr;
1013 case AArch64::ADCXr: return AArch64::ADCSXr;
1014 case AArch64::SUBWrr: return AArch64::SUBSWrr;
1015 case AArch64::SUBWri: return AArch64::SUBSWri;
1016 case AArch64::SUBXrr: return AArch64::SUBSXrr;
1017 case AArch64::SUBXri: return AArch64::SUBSXri;
1018 case AArch64::SBCWr: return AArch64::SBCSWr;
1019 case AArch64::SBCXr: return AArch64::SBCSXr;
1020 case AArch64::ANDWri: return AArch64::ANDSWri;
1021 case AArch64::ANDXri: return AArch64::ANDSXri;
Tim Northover3b0846e2014-05-24 12:50:23 +00001022 }
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001023}
1024
1025/// Check if AArch64::NZCV should be alive in successors of MBB.
1026static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1027 for (auto *BB : MBB->successors())
1028 if (BB->isLiveIn(AArch64::NZCV))
1029 return true;
1030 return false;
1031}
1032
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001033namespace {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001034struct UsedNZCV {
1035 bool N;
1036 bool Z;
1037 bool C;
1038 bool V;
1039 UsedNZCV(): N(false), Z(false), C(false), V(false) {}
1040 UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
1041 this->N |= UsedFlags.N;
1042 this->Z |= UsedFlags.Z;
1043 this->C |= UsedFlags.C;
1044 this->V |= UsedFlags.V;
1045 return *this;
1046 }
1047};
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001048} // end anonymous namespace
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001049
1050/// Find a condition code used by the instruction.
1051/// Returns AArch64CC::Invalid if either the instruction does not use condition
1052/// codes or we don't optimize CmpInstr in the presence of such instructions.
1053static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1054 switch (Instr.getOpcode()) {
1055 default:
1056 return AArch64CC::Invalid;
1057
1058 case AArch64::Bcc: {
1059 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1060 assert(Idx >= 2);
1061 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1062 }
1063
1064 case AArch64::CSINVWr:
1065 case AArch64::CSINVXr:
1066 case AArch64::CSINCWr:
1067 case AArch64::CSINCXr:
1068 case AArch64::CSELWr:
1069 case AArch64::CSELXr:
1070 case AArch64::CSNEGWr:
1071 case AArch64::CSNEGXr:
1072 case AArch64::FCSELSrrr:
1073 case AArch64::FCSELDrrr: {
1074 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1075 assert(Idx >= 1);
1076 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1077 }
1078 }
1079}
1080
1081static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1082 assert(CC != AArch64CC::Invalid);
1083 UsedNZCV UsedFlags;
1084 switch (CC) {
1085 default:
1086 break;
1087
1088 case AArch64CC::EQ: // Z set
1089 case AArch64CC::NE: // Z clear
1090 UsedFlags.Z = true;
1091 break;
1092
1093 case AArch64CC::HI: // Z clear and C set
1094 case AArch64CC::LS: // Z set or C clear
1095 UsedFlags.Z = true;
1096 case AArch64CC::HS: // C set
1097 case AArch64CC::LO: // C clear
1098 UsedFlags.C = true;
1099 break;
1100
1101 case AArch64CC::MI: // N set
1102 case AArch64CC::PL: // N clear
1103 UsedFlags.N = true;
1104 break;
1105
1106 case AArch64CC::VS: // V set
1107 case AArch64CC::VC: // V clear
1108 UsedFlags.V = true;
1109 break;
1110
1111 case AArch64CC::GT: // Z clear, N and V the same
1112 case AArch64CC::LE: // Z set, N and V differ
1113 UsedFlags.Z = true;
1114 case AArch64CC::GE: // N and V the same
1115 case AArch64CC::LT: // N and V differ
1116 UsedFlags.N = true;
1117 UsedFlags.V = true;
1118 break;
1119 }
1120 return UsedFlags;
1121}
1122
1123static bool isADDSRegImm(unsigned Opcode) {
1124 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1125}
1126
1127static bool isSUBSRegImm(unsigned Opcode) {
1128 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1129}
1130
1131/// Check if CmpInstr can be substituted by MI.
1132///
1133/// CmpInstr can be substituted:
1134/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1135/// - and, MI and CmpInstr are from the same MachineBB
1136/// - and, condition flags are not alive in successors of the CmpInstr parent
1137/// - and, if MI opcode is the S form there must be no defs of flags between
1138/// MI and CmpInstr
1139/// or if MI opcode is not the S form there must be neither defs of flags
1140/// nor uses of flags between MI and CmpInstr.
1141/// - and C/V flags are not used after CmpInstr
1142static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1143 const TargetRegisterInfo *TRI) {
1144 assert(MI);
1145 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1146 assert(CmpInstr);
1147
1148 const unsigned CmpOpcode = CmpInstr->getOpcode();
1149 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1150 return false;
1151
1152 if (MI->getParent() != CmpInstr->getParent())
1153 return false;
1154
1155 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1156 return false;
1157
1158 AccessKind AccessToCheck = AK_Write;
1159 if (sForm(*MI) != MI->getOpcode())
1160 AccessToCheck = AK_All;
1161 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1162 return false;
1163
1164 UsedNZCV NZCVUsedAfterCmp;
1165 for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
1166 I != E; ++I) {
1167 const MachineInstr &Instr = *I;
1168 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1169 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1170 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1171 return false;
1172 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1173 }
1174
1175 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1176 break;
1177 }
1178
1179 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1180}
1181
1182/// Substitute an instruction comparing to zero with another instruction
1183/// which produces needed condition flags.
1184///
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001185/// Return true on success.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001186bool AArch64InstrInfo::substituteCmpToZero(
1187 MachineInstr &CmpInstr, unsigned SrcReg,
1188 const MachineRegisterInfo *MRI) const {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001189 assert(MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001190 // Get the unique definition of SrcReg.
1191 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1192 if (!MI)
1193 return false;
1194
1195 const TargetRegisterInfo *TRI = &getRegisterInfo();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001196
1197 unsigned NewOpc = sForm(*MI);
1198 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1199 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001200
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001201 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001202 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001203
1204 // Update the instruction to set NZCV.
1205 MI->setDesc(get(NewOpc));
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001206 CmpInstr.eraseFromParent();
1207 bool succeeded = UpdateOperandRegClass(*MI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001208 (void)succeeded;
1209 assert(succeeded && "Some operands reg class are incompatible!");
1210 MI->addRegisterDefined(AArch64::NZCV, TRI);
1211 return true;
1212}
1213
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001214bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1215 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001216 return false;
1217
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001218 MachineBasicBlock &MBB = *MI.getParent();
1219 DebugLoc DL = MI.getDebugLoc();
1220 unsigned Reg = MI.getOperand(0).getReg();
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001221 const GlobalValue *GV =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001222 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001223 const TargetMachine &TM = MBB.getParent()->getTarget();
1224 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1225 const unsigned char MO_NC = AArch64II::MO_NC;
1226
1227 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1228 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1229 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1230 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001231 .addReg(Reg, RegState::Kill)
1232 .addImm(0)
1233 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001234 } else if (TM.getCodeModel() == CodeModel::Large) {
1235 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1236 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
1237 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1238 .addReg(Reg, RegState::Kill)
1239 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1240 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1241 .addReg(Reg, RegState::Kill)
1242 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1243 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1244 .addReg(Reg, RegState::Kill)
1245 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1246 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001247 .addReg(Reg, RegState::Kill)
1248 .addImm(0)
1249 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001250 } else {
1251 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1252 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1253 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1254 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1255 .addReg(Reg, RegState::Kill)
1256 .addGlobalAddress(GV, 0, LoFlags)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001257 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001258 }
1259
1260 MBB.erase(MI);
1261
1262 return true;
1263}
1264
Tim Northover3b0846e2014-05-24 12:50:23 +00001265/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001266bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1267 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001268 default:
1269 break;
1270 case AArch64::ADDSWrs:
1271 case AArch64::ADDSXrs:
1272 case AArch64::ADDWrs:
1273 case AArch64::ADDXrs:
1274 case AArch64::ANDSWrs:
1275 case AArch64::ANDSXrs:
1276 case AArch64::ANDWrs:
1277 case AArch64::ANDXrs:
1278 case AArch64::BICSWrs:
1279 case AArch64::BICSXrs:
1280 case AArch64::BICWrs:
1281 case AArch64::BICXrs:
1282 case AArch64::CRC32Brr:
1283 case AArch64::CRC32CBrr:
1284 case AArch64::CRC32CHrr:
1285 case AArch64::CRC32CWrr:
1286 case AArch64::CRC32CXrr:
1287 case AArch64::CRC32Hrr:
1288 case AArch64::CRC32Wrr:
1289 case AArch64::CRC32Xrr:
1290 case AArch64::EONWrs:
1291 case AArch64::EONXrs:
1292 case AArch64::EORWrs:
1293 case AArch64::EORXrs:
1294 case AArch64::ORNWrs:
1295 case AArch64::ORNXrs:
1296 case AArch64::ORRWrs:
1297 case AArch64::ORRXrs:
1298 case AArch64::SUBSWrs:
1299 case AArch64::SUBSXrs:
1300 case AArch64::SUBWrs:
1301 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001302 if (MI.getOperand(3).isImm()) {
1303 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001304 return (val != 0);
1305 }
1306 break;
1307 }
1308 return false;
1309}
1310
1311/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001312bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1313 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001314 default:
1315 break;
1316 case AArch64::ADDSWrx:
1317 case AArch64::ADDSXrx:
1318 case AArch64::ADDSXrx64:
1319 case AArch64::ADDWrx:
1320 case AArch64::ADDXrx:
1321 case AArch64::ADDXrx64:
1322 case AArch64::SUBSWrx:
1323 case AArch64::SUBSXrx:
1324 case AArch64::SUBSXrx64:
1325 case AArch64::SUBWrx:
1326 case AArch64::SUBXrx:
1327 case AArch64::SUBXrx64:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001328 if (MI.getOperand(3).isImm()) {
1329 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001330 return (val != 0);
1331 }
1332 break;
1333 }
1334
1335 return false;
1336}
1337
1338// Return true if this instruction simply sets its single destination register
1339// to zero. This is equivalent to a register rename of the zero-register.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001340bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1341 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001342 default:
1343 break;
1344 case AArch64::MOVZWi:
1345 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001346 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1347 assert(MI.getDesc().getNumOperands() == 3 &&
1348 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001349 return true;
1350 }
1351 break;
1352 case AArch64::ANDWri: // and Rd, Rzr, #imm
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001353 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001354 case AArch64::ANDXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001355 return MI.getOperand(1).getReg() == AArch64::XZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001356 case TargetOpcode::COPY:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001357 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001358 }
1359 return false;
1360}
1361
1362// Return true if this instruction simply renames a general register without
1363// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001364bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1365 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001366 default:
1367 break;
1368 case TargetOpcode::COPY: {
1369 // GPR32 copies will by lowered to ORRXrs
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001370 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001371 return (AArch64::GPR32RegClass.contains(DstReg) ||
1372 AArch64::GPR64RegClass.contains(DstReg));
1373 }
1374 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001375 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1376 assert(MI.getDesc().getNumOperands() == 4 &&
1377 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001378 return true;
1379 }
Renato Golin541d7e72014-08-01 17:27:31 +00001380 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001381 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001382 if (MI.getOperand(2).getImm() == 0) {
1383 assert(MI.getDesc().getNumOperands() == 4 &&
1384 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001385 return true;
1386 }
Renato Golin541d7e72014-08-01 17:27:31 +00001387 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001388 }
1389 return false;
1390}
1391
1392// Return true if this instruction simply renames a general register without
1393// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001394bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1395 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001396 default:
1397 break;
1398 case TargetOpcode::COPY: {
1399 // FPR64 copies will by lowered to ORR.16b
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001400 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001401 return (AArch64::FPR64RegClass.contains(DstReg) ||
1402 AArch64::FPR128RegClass.contains(DstReg));
1403 }
1404 case AArch64::ORRv16i8:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001405 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1406 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001407 "invalid ORRv16i8 operands");
1408 return true;
1409 }
Renato Golin541d7e72014-08-01 17:27:31 +00001410 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001411 }
1412 return false;
1413}
1414
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001415unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001416 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001417 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001418 default:
1419 break;
1420 case AArch64::LDRWui:
1421 case AArch64::LDRXui:
1422 case AArch64::LDRBui:
1423 case AArch64::LDRHui:
1424 case AArch64::LDRSui:
1425 case AArch64::LDRDui:
1426 case AArch64::LDRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001427 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1428 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1429 FrameIndex = MI.getOperand(1).getIndex();
1430 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001431 }
1432 break;
1433 }
1434
1435 return 0;
1436}
1437
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001438unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001439 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001440 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001441 default:
1442 break;
1443 case AArch64::STRWui:
1444 case AArch64::STRXui:
1445 case AArch64::STRBui:
1446 case AArch64::STRHui:
1447 case AArch64::STRSui:
1448 case AArch64::STRDui:
1449 case AArch64::STRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001450 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1451 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1452 FrameIndex = MI.getOperand(1).getIndex();
1453 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001454 }
1455 break;
1456 }
1457 return 0;
1458}
1459
1460/// Return true if this is load/store scales or extends its register offset.
1461/// This refers to scaling a dynamic index as opposed to scaled immediates.
1462/// MI should be a memory op that allows scaled addressing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001463bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1464 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001465 default:
1466 break;
1467 case AArch64::LDRBBroW:
1468 case AArch64::LDRBroW:
1469 case AArch64::LDRDroW:
1470 case AArch64::LDRHHroW:
1471 case AArch64::LDRHroW:
1472 case AArch64::LDRQroW:
1473 case AArch64::LDRSBWroW:
1474 case AArch64::LDRSBXroW:
1475 case AArch64::LDRSHWroW:
1476 case AArch64::LDRSHXroW:
1477 case AArch64::LDRSWroW:
1478 case AArch64::LDRSroW:
1479 case AArch64::LDRWroW:
1480 case AArch64::LDRXroW:
1481 case AArch64::STRBBroW:
1482 case AArch64::STRBroW:
1483 case AArch64::STRDroW:
1484 case AArch64::STRHHroW:
1485 case AArch64::STRHroW:
1486 case AArch64::STRQroW:
1487 case AArch64::STRSroW:
1488 case AArch64::STRWroW:
1489 case AArch64::STRXroW:
1490 case AArch64::LDRBBroX:
1491 case AArch64::LDRBroX:
1492 case AArch64::LDRDroX:
1493 case AArch64::LDRHHroX:
1494 case AArch64::LDRHroX:
1495 case AArch64::LDRQroX:
1496 case AArch64::LDRSBWroX:
1497 case AArch64::LDRSBXroX:
1498 case AArch64::LDRSHWroX:
1499 case AArch64::LDRSHXroX:
1500 case AArch64::LDRSWroX:
1501 case AArch64::LDRSroX:
1502 case AArch64::LDRWroX:
1503 case AArch64::LDRXroX:
1504 case AArch64::STRBBroX:
1505 case AArch64::STRBroX:
1506 case AArch64::STRDroX:
1507 case AArch64::STRHHroX:
1508 case AArch64::STRHroX:
1509 case AArch64::STRQroX:
1510 case AArch64::STRSroX:
1511 case AArch64::STRWroX:
1512 case AArch64::STRXroX:
1513
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001514 unsigned Val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001515 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1516 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1517 }
1518 return false;
1519}
1520
1521/// Check all MachineMemOperands for a hint to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001522bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
Justin Lebar288b3372016-07-14 18:15:20 +00001523 return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1524 return MMO->getFlags() & MOSuppressPair;
1525 });
Tim Northover3b0846e2014-05-24 12:50:23 +00001526}
1527
1528/// Set a flag on the first MachineMemOperand to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001529void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1530 if (MI.memoperands_empty())
Tim Northover3b0846e2014-05-24 12:50:23 +00001531 return;
Justin Lebar288b3372016-07-14 18:15:20 +00001532 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
Tim Northover3b0846e2014-05-24 12:50:23 +00001533}
1534
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001535bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1536 switch (Opc) {
1537 default:
1538 return false;
1539 case AArch64::STURSi:
1540 case AArch64::STURDi:
1541 case AArch64::STURQi:
1542 case AArch64::STURBBi:
1543 case AArch64::STURHHi:
1544 case AArch64::STURWi:
1545 case AArch64::STURXi:
1546 case AArch64::LDURSi:
1547 case AArch64::LDURDi:
1548 case AArch64::LDURQi:
1549 case AArch64::LDURWi:
1550 case AArch64::LDURXi:
1551 case AArch64::LDURSWi:
1552 case AArch64::LDURHHi:
1553 case AArch64::LDURBBi:
1554 case AArch64::LDURSBWi:
1555 case AArch64::LDURSHWi:
1556 return true;
1557 }
1558}
1559
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001560bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1561 return isUnscaledLdSt(MI.getOpcode());
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001562}
1563
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001564// Is this a candidate for ld/st merging or pairing? For example, we don't
1565// touch volatiles or load/stores that have a hint to avoid pair formation.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001566bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001567 // If this is a volatile load/store, don't mess with it.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001568 if (MI.hasOrderedMemoryRef())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001569 return false;
1570
1571 // Make sure this is a reg+imm (as opposed to an address reloc).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001572 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1573 if (!MI.getOperand(2).isImm())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001574 return false;
1575
1576 // Can't merge/pair if the instruction modifies the base register.
1577 // e.g., ldr x0, [x0]
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001578 unsigned BaseReg = MI.getOperand(1).getReg();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001579 const TargetRegisterInfo *TRI = &getRegisterInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001580 if (MI.modifiesRegister(BaseReg, TRI))
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001581 return false;
1582
1583 // Check if this load/store has a hint to avoid pair formation.
1584 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1585 if (isLdStPairSuppressed(MI))
1586 return false;
1587
Matthias Braun651cff42016-06-02 18:03:53 +00001588 // On some CPUs quad load/store pairs are slower than two single load/stores.
1589 if (Subtarget.avoidQuadLdStPairs()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001590 switch (MI.getOpcode()) {
Matthias Braunbcfd2362016-05-28 01:06:51 +00001591 default:
1592 break;
Evandro Menezes8d53f882016-04-13 18:31:45 +00001593
Matthias Braunbcfd2362016-05-28 01:06:51 +00001594 case AArch64::LDURQi:
1595 case AArch64::STURQi:
1596 case AArch64::LDRQui:
1597 case AArch64::STRQui:
1598 return false;
Evandro Menezes8d53f882016-04-13 18:31:45 +00001599 }
Matthias Braunbcfd2362016-05-28 01:06:51 +00001600 }
Evandro Menezes8d53f882016-04-13 18:31:45 +00001601
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001602 return true;
1603}
1604
Chad Rosierc27a18f2016-03-09 16:00:35 +00001605bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001606 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
Chad Rosierc27a18f2016-03-09 16:00:35 +00001607 const TargetRegisterInfo *TRI) const {
Geoff Berry22dfbc52016-08-12 15:26:00 +00001608 unsigned Width;
1609 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001610}
1611
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001612bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001613 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
Chad Rosier3528c1e2014-09-08 14:43:48 +00001614 const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001615 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001616 // Handle only loads/stores with base register followed by immediate offset.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001617 if (LdSt.getNumExplicitOperands() == 3) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001618 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001619 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001620 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001621 } else if (LdSt.getNumExplicitOperands() == 4) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001622 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001623 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1624 !LdSt.getOperand(3).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001625 return false;
1626 } else
Chad Rosier3528c1e2014-09-08 14:43:48 +00001627 return false;
1628
1629 // Offset is calculated as the immediate operand multiplied by the scaling factor.
1630 // Unscaled instructions have scaling factor set to 1.
Chad Rosier0da267d2016-03-09 16:46:48 +00001631 unsigned Scale = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001632 switch (LdSt.getOpcode()) {
Chad Rosier3528c1e2014-09-08 14:43:48 +00001633 default:
1634 return false;
1635 case AArch64::LDURQi:
1636 case AArch64::STURQi:
1637 Width = 16;
1638 Scale = 1;
1639 break;
1640 case AArch64::LDURXi:
1641 case AArch64::LDURDi:
1642 case AArch64::STURXi:
1643 case AArch64::STURDi:
1644 Width = 8;
1645 Scale = 1;
1646 break;
1647 case AArch64::LDURWi:
1648 case AArch64::LDURSi:
1649 case AArch64::LDURSWi:
1650 case AArch64::STURWi:
1651 case AArch64::STURSi:
1652 Width = 4;
1653 Scale = 1;
1654 break;
1655 case AArch64::LDURHi:
1656 case AArch64::LDURHHi:
1657 case AArch64::LDURSHXi:
1658 case AArch64::LDURSHWi:
1659 case AArch64::STURHi:
1660 case AArch64::STURHHi:
1661 Width = 2;
1662 Scale = 1;
1663 break;
1664 case AArch64::LDURBi:
1665 case AArch64::LDURBBi:
1666 case AArch64::LDURSBXi:
1667 case AArch64::LDURSBWi:
1668 case AArch64::STURBi:
1669 case AArch64::STURBBi:
1670 Width = 1;
1671 Scale = 1;
1672 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001673 case AArch64::LDPQi:
1674 case AArch64::LDNPQi:
1675 case AArch64::STPQi:
1676 case AArch64::STNPQi:
1677 Scale = 16;
1678 Width = 32;
1679 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001680 case AArch64::LDRQui:
1681 case AArch64::STRQui:
1682 Scale = Width = 16;
1683 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001684 case AArch64::LDPXi:
1685 case AArch64::LDPDi:
1686 case AArch64::LDNPXi:
1687 case AArch64::LDNPDi:
1688 case AArch64::STPXi:
1689 case AArch64::STPDi:
1690 case AArch64::STNPXi:
1691 case AArch64::STNPDi:
1692 Scale = 8;
1693 Width = 16;
1694 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001695 case AArch64::LDRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001696 case AArch64::LDRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001697 case AArch64::STRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001698 case AArch64::STRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001699 Scale = Width = 8;
1700 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001701 case AArch64::LDPWi:
1702 case AArch64::LDPSi:
1703 case AArch64::LDNPWi:
1704 case AArch64::LDNPSi:
1705 case AArch64::STPWi:
1706 case AArch64::STPSi:
1707 case AArch64::STNPWi:
1708 case AArch64::STNPSi:
1709 Scale = 4;
1710 Width = 8;
1711 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001712 case AArch64::LDRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001713 case AArch64::LDRSui:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001714 case AArch64::LDRSWui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001715 case AArch64::STRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001716 case AArch64::STRSui:
1717 Scale = Width = 4;
1718 break;
Chad Rosier84a0afd2015-09-18 14:13:18 +00001719 case AArch64::LDRHui:
1720 case AArch64::LDRHHui:
1721 case AArch64::STRHui:
1722 case AArch64::STRHHui:
1723 Scale = Width = 2;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001724 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001725 case AArch64::LDRBui:
1726 case AArch64::LDRBBui:
1727 case AArch64::STRBui:
1728 case AArch64::STRBBui:
1729 Scale = Width = 1;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001730 break;
Chad Rosier064261d2016-02-01 20:54:36 +00001731 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00001732
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001733 if (LdSt.getNumExplicitOperands() == 3) {
1734 BaseReg = LdSt.getOperand(1).getReg();
1735 Offset = LdSt.getOperand(2).getImm() * Scale;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001736 } else {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001737 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1738 BaseReg = LdSt.getOperand(2).getReg();
1739 Offset = LdSt.getOperand(3).getImm() * Scale;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001740 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00001741 return true;
1742}
1743
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001744// Scale the unscaled offsets. Returns false if the unscaled offset can't be
1745// scaled.
1746static bool scaleOffset(unsigned Opc, int64_t &Offset) {
1747 unsigned OffsetStride = 1;
1748 switch (Opc) {
1749 default:
1750 return false;
1751 case AArch64::LDURQi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001752 case AArch64::STURQi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001753 OffsetStride = 16;
1754 break;
1755 case AArch64::LDURXi:
1756 case AArch64::LDURDi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001757 case AArch64::STURXi:
1758 case AArch64::STURDi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001759 OffsetStride = 8;
1760 break;
1761 case AArch64::LDURWi:
1762 case AArch64::LDURSi:
1763 case AArch64::LDURSWi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001764 case AArch64::STURWi:
1765 case AArch64::STURSi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001766 OffsetStride = 4;
1767 break;
1768 }
1769 // If the byte-offset isn't a multiple of the stride, we can't scale this
1770 // offset.
1771 if (Offset % OffsetStride != 0)
1772 return false;
1773
1774 // Convert the byte-offset used by unscaled into an "element" offset used
1775 // by the scaled pair load/store instructions.
1776 Offset /= OffsetStride;
1777 return true;
1778}
1779
1780static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
1781 if (FirstOpc == SecondOpc)
1782 return true;
1783 // We can also pair sign-ext and zero-ext instructions.
1784 switch (FirstOpc) {
1785 default:
1786 return false;
1787 case AArch64::LDRWui:
1788 case AArch64::LDURWi:
1789 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
1790 case AArch64::LDRSWui:
1791 case AArch64::LDURSWi:
1792 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
1793 }
1794 // These instructions can't be paired based on their opcodes.
1795 return false;
1796}
1797
Tim Northover3b0846e2014-05-24 12:50:23 +00001798/// Detect opportunities for ldp/stp formation.
1799///
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001800/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001801bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
1802 MachineInstr &SecondLdSt,
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001803 unsigned NumLoads) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001804 // Only cluster up to a single pair.
1805 if (NumLoads > 1)
1806 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001807
Geoff Berry22dfbc52016-08-12 15:26:00 +00001808 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
1809 return false;
1810
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001811 // Can we pair these instructions based on their opcodes?
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001812 unsigned FirstOpc = FirstLdSt.getOpcode();
1813 unsigned SecondOpc = SecondLdSt.getOpcode();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001814 if (!canPairLdStOpc(FirstOpc, SecondOpc))
Tim Northover3b0846e2014-05-24 12:50:23 +00001815 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001816
1817 // Can't merge volatiles or load/stores that have a hint to avoid pair
1818 // formation, for example.
1819 if (!isCandidateToMergeOrPair(FirstLdSt) ||
1820 !isCandidateToMergeOrPair(SecondLdSt))
Tim Northover3b0846e2014-05-24 12:50:23 +00001821 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001822
1823 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001824 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001825 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
1826 return false;
1827
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001828 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001829 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
1830 return false;
1831
1832 // Pairwise instructions have a 7-bit signed offset field.
1833 if (Offset1 > 63 || Offset1 < -64)
1834 return false;
1835
Tim Northover3b0846e2014-05-24 12:50:23 +00001836 // The caller should already have ordered First/SecondLdSt by offset.
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001837 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
1838 return Offset1 + 1 == Offset2;
Tim Northover3b0846e2014-05-24 12:50:23 +00001839}
1840
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001841bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
1842 MachineInstr &Second) const {
Matthias Braun651cff42016-06-02 18:03:53 +00001843 if (Subtarget.hasMacroOpFusion()) {
1844 // Fuse CMN, CMP, TST followed by Bcc.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001845 unsigned SecondOpcode = Second.getOpcode();
Matthias Braunc8b67e62015-07-20 23:11:42 +00001846 if (SecondOpcode == AArch64::Bcc) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001847 switch (First.getOpcode()) {
Matthias Braunc8b67e62015-07-20 23:11:42 +00001848 default:
1849 return false;
1850 case AArch64::SUBSWri:
1851 case AArch64::ADDSWri:
1852 case AArch64::ANDSWri:
1853 case AArch64::SUBSXri:
1854 case AArch64::ADDSXri:
1855 case AArch64::ANDSXri:
1856 return true;
1857 }
Matthias Braune536f4f2015-07-20 22:34:47 +00001858 }
Matthias Braun651cff42016-06-02 18:03:53 +00001859 // Fuse ALU operations followed by CBZ/CBNZ.
Matthias Braunc8b67e62015-07-20 23:11:42 +00001860 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
1861 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001862 switch (First.getOpcode()) {
Matthias Braunc8b67e62015-07-20 23:11:42 +00001863 default:
1864 return false;
1865 case AArch64::ADDWri:
1866 case AArch64::ADDXri:
1867 case AArch64::ANDWri:
1868 case AArch64::ANDXri:
1869 case AArch64::EORWri:
1870 case AArch64::EORXri:
1871 case AArch64::ORRWri:
1872 case AArch64::ORRXri:
1873 case AArch64::SUBWri:
1874 case AArch64::SUBXri:
1875 return true;
1876 }
Matthias Braune536f4f2015-07-20 22:34:47 +00001877 }
1878 }
1879 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001880}
1881
Adrian Prantl87b7eb92014-10-01 18:55:02 +00001882MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
1883 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001884 const MDNode *Expr, const DebugLoc &DL) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001885 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1886 .addFrameIndex(FrameIx)
1887 .addImm(0)
1888 .addImm(Offset)
Adrian Prantl87b7eb92014-10-01 18:55:02 +00001889 .addMetadata(Var)
1890 .addMetadata(Expr);
Tim Northover3b0846e2014-05-24 12:50:23 +00001891 return &*MIB;
1892}
1893
1894static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
1895 unsigned Reg, unsigned SubIdx,
1896 unsigned State,
1897 const TargetRegisterInfo *TRI) {
1898 if (!SubIdx)
1899 return MIB.addReg(Reg, State);
1900
1901 if (TargetRegisterInfo::isPhysicalRegister(Reg))
1902 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1903 return MIB.addReg(Reg, State, SubIdx);
1904}
1905
1906static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1907 unsigned NumRegs) {
1908 // We really want the positive remainder mod 32 here, that happens to be
1909 // easily obtainable with a mask.
1910 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1911}
1912
1913void AArch64InstrInfo::copyPhysRegTuple(
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001914 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00001915 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
1916 llvm::ArrayRef<unsigned> Indices) const {
Eric Christopher58f32662014-06-10 22:57:21 +00001917 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001918 "Unexpected register copy without NEON");
Eric Christophera0de2532015-03-18 20:37:30 +00001919 const TargetRegisterInfo *TRI = &getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001920 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
1921 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
1922 unsigned NumRegs = Indices.size();
1923
1924 int SubReg = 0, End = NumRegs, Incr = 1;
1925 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
1926 SubReg = NumRegs - 1;
1927 End = -1;
1928 Incr = -1;
1929 }
1930
1931 for (; SubReg != End; SubReg += Incr) {
James Molloyf8aa57a2015-04-16 11:37:40 +00001932 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
Tim Northover3b0846e2014-05-24 12:50:23 +00001933 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
1934 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
1935 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
1936 }
1937}
1938
1939void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001940 MachineBasicBlock::iterator I,
1941 const DebugLoc &DL, unsigned DestReg,
1942 unsigned SrcReg, bool KillSrc) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001943 if (AArch64::GPR32spRegClass.contains(DestReg) &&
1944 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
Eric Christophera0de2532015-03-18 20:37:30 +00001945 const TargetRegisterInfo *TRI = &getRegisterInfo();
1946
Tim Northover3b0846e2014-05-24 12:50:23 +00001947 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
1948 // If either operand is WSP, expand to ADD #0.
1949 if (Subtarget.hasZeroCycleRegMove()) {
1950 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
1951 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1952 &AArch64::GPR64spRegClass);
1953 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1954 &AArch64::GPR64spRegClass);
1955 // This instruction is reading and writing X registers. This may upset
1956 // the register scavenger and machine verifier, so we need to indicate
1957 // that we are reading an undefined value from SrcRegX, but a proper
1958 // value from SrcReg.
1959 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
1960 .addReg(SrcRegX, RegState::Undef)
1961 .addImm(0)
1962 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1963 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1964 } else {
1965 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
1966 .addReg(SrcReg, getKillRegState(KillSrc))
1967 .addImm(0)
1968 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1969 }
1970 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
1971 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
1972 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1973 } else {
1974 if (Subtarget.hasZeroCycleRegMove()) {
1975 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
1976 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1977 &AArch64::GPR64spRegClass);
1978 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1979 &AArch64::GPR64spRegClass);
1980 // This instruction is reading and writing X registers. This may upset
1981 // the register scavenger and machine verifier, so we need to indicate
1982 // that we are reading an undefined value from SrcRegX, but a proper
1983 // value from SrcReg.
1984 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
1985 .addReg(AArch64::XZR)
1986 .addReg(SrcRegX, RegState::Undef)
1987 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1988 } else {
1989 // Otherwise, expand to ORR WZR.
1990 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
1991 .addReg(AArch64::WZR)
1992 .addReg(SrcReg, getKillRegState(KillSrc));
1993 }
1994 }
1995 return;
1996 }
1997
1998 if (AArch64::GPR64spRegClass.contains(DestReg) &&
1999 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2000 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2001 // If either operand is SP, expand to ADD #0.
2002 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2003 .addReg(SrcReg, getKillRegState(KillSrc))
2004 .addImm(0)
2005 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2006 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
2007 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
2008 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2009 } else {
2010 // Otherwise, expand to ORR XZR.
2011 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2012 .addReg(AArch64::XZR)
2013 .addReg(SrcReg, getKillRegState(KillSrc));
2014 }
2015 return;
2016 }
2017
2018 // Copy a DDDD register quad by copying the individual sub-registers.
2019 if (AArch64::DDDDRegClass.contains(DestReg) &&
2020 AArch64::DDDDRegClass.contains(SrcReg)) {
2021 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
2022 AArch64::dsub2, AArch64::dsub3 };
2023 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2024 Indices);
2025 return;
2026 }
2027
2028 // Copy a DDD register triple by copying the individual sub-registers.
2029 if (AArch64::DDDRegClass.contains(DestReg) &&
2030 AArch64::DDDRegClass.contains(SrcReg)) {
2031 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
2032 AArch64::dsub2 };
2033 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2034 Indices);
2035 return;
2036 }
2037
2038 // Copy a DD register pair by copying the individual sub-registers.
2039 if (AArch64::DDRegClass.contains(DestReg) &&
2040 AArch64::DDRegClass.contains(SrcReg)) {
2041 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
2042 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2043 Indices);
2044 return;
2045 }
2046
2047 // Copy a QQQQ register quad by copying the individual sub-registers.
2048 if (AArch64::QQQQRegClass.contains(DestReg) &&
2049 AArch64::QQQQRegClass.contains(SrcReg)) {
2050 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2051 AArch64::qsub2, AArch64::qsub3 };
2052 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2053 Indices);
2054 return;
2055 }
2056
2057 // Copy a QQQ register triple by copying the individual sub-registers.
2058 if (AArch64::QQQRegClass.contains(DestReg) &&
2059 AArch64::QQQRegClass.contains(SrcReg)) {
2060 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2061 AArch64::qsub2 };
2062 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2063 Indices);
2064 return;
2065 }
2066
2067 // Copy a QQ register pair by copying the individual sub-registers.
2068 if (AArch64::QQRegClass.contains(DestReg) &&
2069 AArch64::QQRegClass.contains(SrcReg)) {
2070 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
2071 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2072 Indices);
2073 return;
2074 }
2075
2076 if (AArch64::FPR128RegClass.contains(DestReg) &&
2077 AArch64::FPR128RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002078 if(Subtarget.hasNEON()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002079 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2080 .addReg(SrcReg)
2081 .addReg(SrcReg, getKillRegState(KillSrc));
2082 } else {
2083 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2084 .addReg(AArch64::SP, RegState::Define)
2085 .addReg(SrcReg, getKillRegState(KillSrc))
2086 .addReg(AArch64::SP)
2087 .addImm(-16);
2088 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2089 .addReg(AArch64::SP, RegState::Define)
2090 .addReg(DestReg, RegState::Define)
2091 .addReg(AArch64::SP)
2092 .addImm(16);
2093 }
2094 return;
2095 }
2096
2097 if (AArch64::FPR64RegClass.contains(DestReg) &&
2098 AArch64::FPR64RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002099 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002100 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2101 &AArch64::FPR128RegClass);
2102 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2103 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002104 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2105 .addReg(SrcReg)
2106 .addReg(SrcReg, getKillRegState(KillSrc));
2107 } else {
2108 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2109 .addReg(SrcReg, getKillRegState(KillSrc));
2110 }
2111 return;
2112 }
2113
2114 if (AArch64::FPR32RegClass.contains(DestReg) &&
2115 AArch64::FPR32RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002116 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002117 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2118 &AArch64::FPR128RegClass);
2119 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2120 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002121 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2122 .addReg(SrcReg)
2123 .addReg(SrcReg, getKillRegState(KillSrc));
2124 } else {
2125 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2126 .addReg(SrcReg, getKillRegState(KillSrc));
2127 }
2128 return;
2129 }
2130
2131 if (AArch64::FPR16RegClass.contains(DestReg) &&
2132 AArch64::FPR16RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002133 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002134 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2135 &AArch64::FPR128RegClass);
2136 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2137 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002138 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2139 .addReg(SrcReg)
2140 .addReg(SrcReg, getKillRegState(KillSrc));
2141 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002142 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2143 &AArch64::FPR32RegClass);
2144 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2145 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002146 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2147 .addReg(SrcReg, getKillRegState(KillSrc));
2148 }
2149 return;
2150 }
2151
2152 if (AArch64::FPR8RegClass.contains(DestReg) &&
2153 AArch64::FPR8RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002154 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002155 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
Tim Northover3b0846e2014-05-24 12:50:23 +00002156 &AArch64::FPR128RegClass);
Eric Christophera0de2532015-03-18 20:37:30 +00002157 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2158 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002159 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2160 .addReg(SrcReg)
2161 .addReg(SrcReg, getKillRegState(KillSrc));
2162 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002163 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2164 &AArch64::FPR32RegClass);
2165 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2166 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002167 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2168 .addReg(SrcReg, getKillRegState(KillSrc));
2169 }
2170 return;
2171 }
2172
2173 // Copies between GPR64 and FPR64.
2174 if (AArch64::FPR64RegClass.contains(DestReg) &&
2175 AArch64::GPR64RegClass.contains(SrcReg)) {
2176 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2177 .addReg(SrcReg, getKillRegState(KillSrc));
2178 return;
2179 }
2180 if (AArch64::GPR64RegClass.contains(DestReg) &&
2181 AArch64::FPR64RegClass.contains(SrcReg)) {
2182 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2183 .addReg(SrcReg, getKillRegState(KillSrc));
2184 return;
2185 }
2186 // Copies between GPR32 and FPR32.
2187 if (AArch64::FPR32RegClass.contains(DestReg) &&
2188 AArch64::GPR32RegClass.contains(SrcReg)) {
2189 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2190 .addReg(SrcReg, getKillRegState(KillSrc));
2191 return;
2192 }
2193 if (AArch64::GPR32RegClass.contains(DestReg) &&
2194 AArch64::FPR32RegClass.contains(SrcReg)) {
2195 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2196 .addReg(SrcReg, getKillRegState(KillSrc));
2197 return;
2198 }
2199
Tim Northover1bed9af2014-05-27 12:16:02 +00002200 if (DestReg == AArch64::NZCV) {
2201 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2202 BuildMI(MBB, I, DL, get(AArch64::MSR))
2203 .addImm(AArch64SysReg::NZCV)
2204 .addReg(SrcReg, getKillRegState(KillSrc))
2205 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2206 return;
2207 }
2208
2209 if (SrcReg == AArch64::NZCV) {
2210 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
Quentin Colombet658d9db2016-04-22 18:46:17 +00002211 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
Tim Northover1bed9af2014-05-27 12:16:02 +00002212 .addImm(AArch64SysReg::NZCV)
2213 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2214 return;
2215 }
2216
2217 llvm_unreachable("unimplemented reg-to-reg copy");
Tim Northover3b0846e2014-05-24 12:50:23 +00002218}
2219
2220void AArch64InstrInfo::storeRegToStackSlot(
2221 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2222 bool isKill, int FI, const TargetRegisterClass *RC,
2223 const TargetRegisterInfo *TRI) const {
2224 DebugLoc DL;
2225 if (MBBI != MBB.end())
2226 DL = MBBI->getDebugLoc();
2227 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002228 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002229 unsigned Align = MFI.getObjectAlignment(FI);
2230
Alex Lorenze40c8a22015-08-11 23:09:45 +00002231 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002232 MachineMemOperand *MMO = MF.getMachineMemOperand(
2233 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2234 unsigned Opc = 0;
2235 bool Offset = true;
2236 switch (RC->getSize()) {
2237 case 1:
2238 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2239 Opc = AArch64::STRBui;
2240 break;
2241 case 2:
2242 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2243 Opc = AArch64::STRHui;
2244 break;
2245 case 4:
2246 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2247 Opc = AArch64::STRWui;
2248 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2249 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2250 else
2251 assert(SrcReg != AArch64::WSP);
2252 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2253 Opc = AArch64::STRSui;
2254 break;
2255 case 8:
2256 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2257 Opc = AArch64::STRXui;
2258 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2259 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2260 else
2261 assert(SrcReg != AArch64::SP);
2262 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2263 Opc = AArch64::STRDui;
2264 break;
2265 case 16:
2266 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2267 Opc = AArch64::STRQui;
2268 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002269 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002270 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002271 Opc = AArch64::ST1Twov1d;
2272 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002273 }
2274 break;
2275 case 24:
2276 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002277 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002278 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002279 Opc = AArch64::ST1Threev1d;
2280 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002281 }
2282 break;
2283 case 32:
2284 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002285 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002286 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002287 Opc = AArch64::ST1Fourv1d;
2288 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002289 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002290 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002291 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002292 Opc = AArch64::ST1Twov2d;
2293 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002294 }
2295 break;
2296 case 48:
2297 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002298 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002299 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002300 Opc = AArch64::ST1Threev2d;
2301 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002302 }
2303 break;
2304 case 64:
2305 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002306 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002307 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002308 Opc = AArch64::ST1Fourv2d;
2309 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002310 }
2311 break;
2312 }
2313 assert(Opc && "Unknown register class");
2314
James Molloyf8aa57a2015-04-16 11:37:40 +00002315 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002316 .addReg(SrcReg, getKillRegState(isKill))
2317 .addFrameIndex(FI);
2318
2319 if (Offset)
2320 MI.addImm(0);
2321 MI.addMemOperand(MMO);
2322}
2323
2324void AArch64InstrInfo::loadRegFromStackSlot(
2325 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2326 int FI, const TargetRegisterClass *RC,
2327 const TargetRegisterInfo *TRI) const {
2328 DebugLoc DL;
2329 if (MBBI != MBB.end())
2330 DL = MBBI->getDebugLoc();
2331 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002332 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002333 unsigned Align = MFI.getObjectAlignment(FI);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002334 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002335 MachineMemOperand *MMO = MF.getMachineMemOperand(
2336 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2337
2338 unsigned Opc = 0;
2339 bool Offset = true;
2340 switch (RC->getSize()) {
2341 case 1:
2342 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2343 Opc = AArch64::LDRBui;
2344 break;
2345 case 2:
2346 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2347 Opc = AArch64::LDRHui;
2348 break;
2349 case 4:
2350 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2351 Opc = AArch64::LDRWui;
2352 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2353 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2354 else
2355 assert(DestReg != AArch64::WSP);
2356 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2357 Opc = AArch64::LDRSui;
2358 break;
2359 case 8:
2360 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2361 Opc = AArch64::LDRXui;
2362 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2363 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2364 else
2365 assert(DestReg != AArch64::SP);
2366 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2367 Opc = AArch64::LDRDui;
2368 break;
2369 case 16:
2370 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2371 Opc = AArch64::LDRQui;
2372 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002373 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002374 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002375 Opc = AArch64::LD1Twov1d;
2376 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002377 }
2378 break;
2379 case 24:
2380 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002381 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002382 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002383 Opc = AArch64::LD1Threev1d;
2384 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002385 }
2386 break;
2387 case 32:
2388 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002389 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002390 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002391 Opc = AArch64::LD1Fourv1d;
2392 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002393 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002394 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002395 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002396 Opc = AArch64::LD1Twov2d;
2397 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002398 }
2399 break;
2400 case 48:
2401 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002402 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002403 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002404 Opc = AArch64::LD1Threev2d;
2405 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002406 }
2407 break;
2408 case 64:
2409 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002410 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002411 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002412 Opc = AArch64::LD1Fourv2d;
2413 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002414 }
2415 break;
2416 }
2417 assert(Opc && "Unknown register class");
2418
James Molloyf8aa57a2015-04-16 11:37:40 +00002419 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002420 .addReg(DestReg, getDefRegState(true))
2421 .addFrameIndex(FI);
2422 if (Offset)
2423 MI.addImm(0);
2424 MI.addMemOperand(MMO);
2425}
2426
2427void llvm::emitFrameOffset(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002428 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00002429 unsigned DestReg, unsigned SrcReg, int Offset,
Eric Christopherbc76b972014-06-10 17:33:39 +00002430 const TargetInstrInfo *TII,
Tim Northover3b0846e2014-05-24 12:50:23 +00002431 MachineInstr::MIFlag Flag, bool SetNZCV) {
2432 if (DestReg == SrcReg && Offset == 0)
2433 return;
2434
Geoff Berrya5335642016-05-06 16:34:59 +00002435 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2436 "SP increment/decrement not 16-byte aligned");
2437
Tim Northover3b0846e2014-05-24 12:50:23 +00002438 bool isSub = Offset < 0;
2439 if (isSub)
2440 Offset = -Offset;
2441
2442 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2443 // scratch register. If DestReg is a virtual register, use it as the
2444 // scratch register; otherwise, create a new virtual register (to be
2445 // replaced by the scavenger at the end of PEI). That case can be optimized
2446 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2447 // register can be loaded with offset%8 and the add/sub can use an extending
2448 // instruction with LSL#3.
2449 // Currently the function handles any offsets but generates a poor sequence
2450 // of code.
2451 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2452
2453 unsigned Opc;
2454 if (SetNZCV)
2455 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2456 else
2457 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2458 const unsigned MaxEncoding = 0xfff;
2459 const unsigned ShiftSize = 12;
2460 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2461 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2462 unsigned ThisVal;
2463 if (((unsigned)Offset) > MaxEncodableValue) {
2464 ThisVal = MaxEncodableValue;
2465 } else {
2466 ThisVal = Offset & MaxEncodableValue;
2467 }
2468 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2469 "Encoding cannot handle value that big");
2470 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2471 .addReg(SrcReg)
2472 .addImm(ThisVal >> ShiftSize)
2473 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2474 .setMIFlag(Flag);
2475
2476 SrcReg = DestReg;
2477 Offset -= ThisVal;
2478 if (Offset == 0)
2479 return;
2480 }
2481 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2482 .addReg(SrcReg)
2483 .addImm(Offset)
2484 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2485 .setMIFlag(Flag);
2486}
2487
Keno Fischere70b31f2015-06-08 20:09:58 +00002488MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002489 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
Jonas Paulsson8e5b0c62016-05-10 08:09:37 +00002490 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2491 LiveIntervals *LIS) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002492 // This is a bit of a hack. Consider this instruction:
2493 //
2494 // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2495 //
2496 // We explicitly chose GPR64all for the virtual register so such a copy might
2497 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2498 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2499 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2500 //
2501 // To prevent that, we are going to constrain the %vreg0 register class here.
2502 //
2503 // <rdar://problem/11522048>
2504 //
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002505 if (MI.isCopy()) {
2506 unsigned DstReg = MI.getOperand(0).getReg();
2507 unsigned SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00002508 if (SrcReg == AArch64::SP &&
2509 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2510 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2511 return nullptr;
2512 }
2513 if (DstReg == AArch64::SP &&
2514 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2515 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2516 return nullptr;
2517 }
2518 }
2519
2520 // Cannot fold.
2521 return nullptr;
2522}
2523
2524int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2525 bool *OutUseUnscaledOp,
2526 unsigned *OutUnscaledOp,
2527 int *EmittableOffset) {
2528 int Scale = 1;
2529 bool IsSigned = false;
2530 // The ImmIdx should be changed case by case if it is not 2.
2531 unsigned ImmIdx = 2;
2532 unsigned UnscaledOp = 0;
2533 // Set output values in case of early exit.
2534 if (EmittableOffset)
2535 *EmittableOffset = 0;
2536 if (OutUseUnscaledOp)
2537 *OutUseUnscaledOp = false;
2538 if (OutUnscaledOp)
2539 *OutUnscaledOp = 0;
2540 switch (MI.getOpcode()) {
2541 default:
Craig Topper2a30d782014-06-18 05:05:13 +00002542 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
Tim Northover3b0846e2014-05-24 12:50:23 +00002543 // Vector spills/fills can't take an immediate offset.
2544 case AArch64::LD1Twov2d:
2545 case AArch64::LD1Threev2d:
2546 case AArch64::LD1Fourv2d:
2547 case AArch64::LD1Twov1d:
2548 case AArch64::LD1Threev1d:
2549 case AArch64::LD1Fourv1d:
2550 case AArch64::ST1Twov2d:
2551 case AArch64::ST1Threev2d:
2552 case AArch64::ST1Fourv2d:
2553 case AArch64::ST1Twov1d:
2554 case AArch64::ST1Threev1d:
2555 case AArch64::ST1Fourv1d:
2556 return AArch64FrameOffsetCannotUpdate;
2557 case AArch64::PRFMui:
2558 Scale = 8;
2559 UnscaledOp = AArch64::PRFUMi;
2560 break;
2561 case AArch64::LDRXui:
2562 Scale = 8;
2563 UnscaledOp = AArch64::LDURXi;
2564 break;
2565 case AArch64::LDRWui:
2566 Scale = 4;
2567 UnscaledOp = AArch64::LDURWi;
2568 break;
2569 case AArch64::LDRBui:
2570 Scale = 1;
2571 UnscaledOp = AArch64::LDURBi;
2572 break;
2573 case AArch64::LDRHui:
2574 Scale = 2;
2575 UnscaledOp = AArch64::LDURHi;
2576 break;
2577 case AArch64::LDRSui:
2578 Scale = 4;
2579 UnscaledOp = AArch64::LDURSi;
2580 break;
2581 case AArch64::LDRDui:
2582 Scale = 8;
2583 UnscaledOp = AArch64::LDURDi;
2584 break;
2585 case AArch64::LDRQui:
2586 Scale = 16;
2587 UnscaledOp = AArch64::LDURQi;
2588 break;
2589 case AArch64::LDRBBui:
2590 Scale = 1;
2591 UnscaledOp = AArch64::LDURBBi;
2592 break;
2593 case AArch64::LDRHHui:
2594 Scale = 2;
2595 UnscaledOp = AArch64::LDURHHi;
2596 break;
2597 case AArch64::LDRSBXui:
2598 Scale = 1;
2599 UnscaledOp = AArch64::LDURSBXi;
2600 break;
2601 case AArch64::LDRSBWui:
2602 Scale = 1;
2603 UnscaledOp = AArch64::LDURSBWi;
2604 break;
2605 case AArch64::LDRSHXui:
2606 Scale = 2;
2607 UnscaledOp = AArch64::LDURSHXi;
2608 break;
2609 case AArch64::LDRSHWui:
2610 Scale = 2;
2611 UnscaledOp = AArch64::LDURSHWi;
2612 break;
2613 case AArch64::LDRSWui:
2614 Scale = 4;
2615 UnscaledOp = AArch64::LDURSWi;
2616 break;
2617
2618 case AArch64::STRXui:
2619 Scale = 8;
2620 UnscaledOp = AArch64::STURXi;
2621 break;
2622 case AArch64::STRWui:
2623 Scale = 4;
2624 UnscaledOp = AArch64::STURWi;
2625 break;
2626 case AArch64::STRBui:
2627 Scale = 1;
2628 UnscaledOp = AArch64::STURBi;
2629 break;
2630 case AArch64::STRHui:
2631 Scale = 2;
2632 UnscaledOp = AArch64::STURHi;
2633 break;
2634 case AArch64::STRSui:
2635 Scale = 4;
2636 UnscaledOp = AArch64::STURSi;
2637 break;
2638 case AArch64::STRDui:
2639 Scale = 8;
2640 UnscaledOp = AArch64::STURDi;
2641 break;
2642 case AArch64::STRQui:
2643 Scale = 16;
2644 UnscaledOp = AArch64::STURQi;
2645 break;
2646 case AArch64::STRBBui:
2647 Scale = 1;
2648 UnscaledOp = AArch64::STURBBi;
2649 break;
2650 case AArch64::STRHHui:
2651 Scale = 2;
2652 UnscaledOp = AArch64::STURHHi;
2653 break;
2654
2655 case AArch64::LDPXi:
2656 case AArch64::LDPDi:
2657 case AArch64::STPXi:
2658 case AArch64::STPDi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002659 case AArch64::LDNPXi:
2660 case AArch64::LDNPDi:
2661 case AArch64::STNPXi:
2662 case AArch64::STNPDi:
2663 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002664 IsSigned = true;
2665 Scale = 8;
2666 break;
2667 case AArch64::LDPQi:
2668 case AArch64::STPQi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002669 case AArch64::LDNPQi:
2670 case AArch64::STNPQi:
2671 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002672 IsSigned = true;
2673 Scale = 16;
2674 break;
2675 case AArch64::LDPWi:
2676 case AArch64::LDPSi:
2677 case AArch64::STPWi:
2678 case AArch64::STPSi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002679 case AArch64::LDNPWi:
2680 case AArch64::LDNPSi:
2681 case AArch64::STNPWi:
2682 case AArch64::STNPSi:
2683 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002684 IsSigned = true;
2685 Scale = 4;
2686 break;
2687
2688 case AArch64::LDURXi:
2689 case AArch64::LDURWi:
2690 case AArch64::LDURBi:
2691 case AArch64::LDURHi:
2692 case AArch64::LDURSi:
2693 case AArch64::LDURDi:
2694 case AArch64::LDURQi:
2695 case AArch64::LDURHHi:
2696 case AArch64::LDURBBi:
2697 case AArch64::LDURSBXi:
2698 case AArch64::LDURSBWi:
2699 case AArch64::LDURSHXi:
2700 case AArch64::LDURSHWi:
2701 case AArch64::LDURSWi:
2702 case AArch64::STURXi:
2703 case AArch64::STURWi:
2704 case AArch64::STURBi:
2705 case AArch64::STURHi:
2706 case AArch64::STURSi:
2707 case AArch64::STURDi:
2708 case AArch64::STURQi:
2709 case AArch64::STURBBi:
2710 case AArch64::STURHHi:
2711 Scale = 1;
2712 break;
2713 }
2714
2715 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2716
2717 bool useUnscaledOp = false;
2718 // If the offset doesn't match the scale, we rewrite the instruction to
2719 // use the unscaled instruction instead. Likewise, if we have a negative
2720 // offset (and have an unscaled op to use).
2721 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2722 useUnscaledOp = true;
2723
2724 // Use an unscaled addressing mode if the instruction has a negative offset
2725 // (or if the instruction is already using an unscaled addressing mode).
2726 unsigned MaskBits;
2727 if (IsSigned) {
2728 // ldp/stp instructions.
2729 MaskBits = 7;
2730 Offset /= Scale;
2731 } else if (UnscaledOp == 0 || useUnscaledOp) {
2732 MaskBits = 9;
2733 IsSigned = true;
2734 Scale = 1;
2735 } else {
2736 MaskBits = 12;
2737 IsSigned = false;
2738 Offset /= Scale;
2739 }
2740
2741 // Attempt to fold address computation.
2742 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2743 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2744 if (Offset >= MinOff && Offset <= MaxOff) {
2745 if (EmittableOffset)
2746 *EmittableOffset = Offset;
2747 Offset = 0;
2748 } else {
2749 int NewOff = Offset < 0 ? MinOff : MaxOff;
2750 if (EmittableOffset)
2751 *EmittableOffset = NewOff;
2752 Offset = (Offset - NewOff) * Scale;
2753 }
2754 if (OutUseUnscaledOp)
2755 *OutUseUnscaledOp = useUnscaledOp;
2756 if (OutUnscaledOp)
2757 *OutUnscaledOp = UnscaledOp;
2758 return AArch64FrameOffsetCanUpdate |
2759 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2760}
2761
2762bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2763 unsigned FrameReg, int &Offset,
2764 const AArch64InstrInfo *TII) {
2765 unsigned Opcode = MI.getOpcode();
2766 unsigned ImmIdx = FrameRegIdx + 1;
2767
2768 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
2769 Offset += MI.getOperand(ImmIdx).getImm();
2770 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
2771 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
2772 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
2773 MI.eraseFromParent();
2774 Offset = 0;
2775 return true;
2776 }
2777
2778 int NewOffset;
2779 unsigned UnscaledOp;
2780 bool UseUnscaledOp;
2781 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
2782 &UnscaledOp, &NewOffset);
2783 if (Status & AArch64FrameOffsetCanUpdate) {
2784 if (Status & AArch64FrameOffsetIsLegal)
2785 // Replace the FrameIndex with FrameReg.
2786 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2787 if (UseUnscaledOp)
2788 MI.setDesc(TII->get(UnscaledOp));
2789
2790 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
2791 return Offset == 0;
2792 }
2793
2794 return false;
2795}
2796
2797void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
2798 NopInst.setOpcode(AArch64::HINT);
Jim Grosbache9119e42015-05-13 18:37:00 +00002799 NopInst.addOperand(MCOperand::createImm(0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002800}
Chad Rosier9d1a5562016-05-02 14:56:21 +00002801
2802// AArch64 supports MachineCombiner.
Benjamin Kramer8c90fd72014-09-03 11:41:21 +00002803bool AArch64InstrInfo::useMachineCombiner() const {
Chad Rosier9d1a5562016-05-02 14:56:21 +00002804
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002805 return true;
2806}
2807//
2808// True when Opc sets flag
2809static bool isCombineInstrSettingFlag(unsigned Opc) {
2810 switch (Opc) {
2811 case AArch64::ADDSWrr:
2812 case AArch64::ADDSWri:
2813 case AArch64::ADDSXrr:
2814 case AArch64::ADDSXri:
2815 case AArch64::SUBSWrr:
2816 case AArch64::SUBSXrr:
2817 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2818 case AArch64::SUBSWri:
2819 case AArch64::SUBSXri:
2820 return true;
2821 default:
2822 break;
2823 }
2824 return false;
2825}
2826//
2827// 32b Opcodes that can be combined with a MUL
2828static bool isCombineInstrCandidate32(unsigned Opc) {
2829 switch (Opc) {
2830 case AArch64::ADDWrr:
2831 case AArch64::ADDWri:
2832 case AArch64::SUBWrr:
2833 case AArch64::ADDSWrr:
2834 case AArch64::ADDSWri:
2835 case AArch64::SUBSWrr:
2836 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2837 case AArch64::SUBWri:
2838 case AArch64::SUBSWri:
2839 return true;
2840 default:
2841 break;
2842 }
2843 return false;
2844}
2845//
2846// 64b Opcodes that can be combined with a MUL
2847static bool isCombineInstrCandidate64(unsigned Opc) {
2848 switch (Opc) {
2849 case AArch64::ADDXrr:
2850 case AArch64::ADDXri:
2851 case AArch64::SUBXrr:
2852 case AArch64::ADDSXrr:
2853 case AArch64::ADDSXri:
2854 case AArch64::SUBSXrr:
2855 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2856 case AArch64::SUBXri:
2857 case AArch64::SUBSXri:
2858 return true;
2859 default:
2860 break;
2861 }
2862 return false;
2863}
2864//
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002865// FP Opcodes that can be combined with a FMUL
2866static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
2867 switch (Inst.getOpcode()) {
2868 case AArch64::FADDSrr:
2869 case AArch64::FADDDrr:
2870 case AArch64::FADDv2f32:
2871 case AArch64::FADDv2f64:
2872 case AArch64::FADDv4f32:
2873 case AArch64::FSUBSrr:
2874 case AArch64::FSUBDrr:
2875 case AArch64::FSUBv2f32:
2876 case AArch64::FSUBv2f64:
2877 case AArch64::FSUBv4f32:
2878 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2879 default:
2880 break;
2881 }
2882 return false;
2883}
2884//
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002885// Opcodes that can be combined with a MUL
2886static bool isCombineInstrCandidate(unsigned Opc) {
2887 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
2888}
2889
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002890//
2891// Utility routine that checks if \param MO is defined by an
2892// \param CombineOpc instruction in the basic block \param MBB
2893static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
2894 unsigned CombineOpc, unsigned ZeroReg = 0,
2895 bool CheckZeroReg = false) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002896 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2897 MachineInstr *MI = nullptr;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002898
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002899 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2900 MI = MRI.getUniqueVRegDef(MO.getReg());
2901 // And it needs to be in the trace (otherwise, it won't have a depth).
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002902 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002903 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002904 // Must only used by the user we combine with.
Gerolf Hoflehnerfe2c11f2014-08-13 22:07:36 +00002905 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002906 return false;
2907
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002908 if (CheckZeroReg) {
2909 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
2910 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
2911 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
2912 // The third input reg must be zero.
2913 if (MI->getOperand(3).getReg() != ZeroReg)
2914 return false;
2915 }
2916
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002917 return true;
2918}
2919
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002920//
2921// Is \param MO defined by an integer multiply and can be combined?
2922static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2923 unsigned MulOpc, unsigned ZeroReg) {
2924 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
2925}
2926
2927//
2928// Is \param MO defined by a floating-point multiply and can be combined?
2929static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2930 unsigned MulOpc) {
2931 return canCombine(MBB, MO, MulOpc);
2932}
2933
Haicheng Wu08b94622016-01-07 04:01:02 +00002934// TODO: There are many more machine instruction opcodes to match:
2935// 1. Other data types (integer, vectors)
2936// 2. Other math / logic operations (xor, or)
2937// 3. Other forms of the same operation (intrinsics and other variants)
2938bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
2939 switch (Inst.getOpcode()) {
2940 case AArch64::FADDDrr:
2941 case AArch64::FADDSrr:
2942 case AArch64::FADDv2f32:
2943 case AArch64::FADDv2f64:
2944 case AArch64::FADDv4f32:
2945 case AArch64::FMULDrr:
2946 case AArch64::FMULSrr:
2947 case AArch64::FMULX32:
2948 case AArch64::FMULX64:
2949 case AArch64::FMULXv2f32:
2950 case AArch64::FMULXv2f64:
2951 case AArch64::FMULXv4f32:
2952 case AArch64::FMULv2f32:
2953 case AArch64::FMULv2f64:
2954 case AArch64::FMULv4f32:
2955 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2956 default:
2957 return false;
2958 }
2959}
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002960
Haicheng Wu08b94622016-01-07 04:01:02 +00002961/// Find instructions that can be turned into madd.
2962static bool getMaddPatterns(MachineInstr &Root,
2963 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002964 unsigned Opc = Root.getOpcode();
2965 MachineBasicBlock &MBB = *Root.getParent();
2966 bool Found = false;
2967
2968 if (!isCombineInstrCandidate(Opc))
Chad Rosier85c85942016-03-23 20:07:28 +00002969 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002970 if (isCombineInstrSettingFlag(Opc)) {
2971 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
2972 // When NZCV is live bail out.
2973 if (Cmp_NZCV == -1)
Chad Rosier85c85942016-03-23 20:07:28 +00002974 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002975 unsigned NewOpc = convertFlagSettingOpcode(Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002976 // When opcode can't change bail out.
2977 // CHECKME: do we miss any cases for opcode conversion?
2978 if (NewOpc == Opc)
Chad Rosier85c85942016-03-23 20:07:28 +00002979 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002980 Opc = NewOpc;
2981 }
2982
2983 switch (Opc) {
2984 default:
2985 break;
2986 case AArch64::ADDWrr:
2987 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
2988 "ADDWrr does not have register operands");
2989 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2990 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002991 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002992 Found = true;
2993 }
2994 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2995 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002996 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002997 Found = true;
2998 }
2999 break;
3000 case AArch64::ADDXrr:
3001 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3002 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003003 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003004 Found = true;
3005 }
3006 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3007 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003008 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003009 Found = true;
3010 }
3011 break;
3012 case AArch64::SUBWrr:
3013 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3014 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003015 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003016 Found = true;
3017 }
3018 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3019 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003020 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003021 Found = true;
3022 }
3023 break;
3024 case AArch64::SUBXrr:
3025 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3026 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003027 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003028 Found = true;
3029 }
3030 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3031 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003032 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003033 Found = true;
3034 }
3035 break;
3036 case AArch64::ADDWri:
3037 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3038 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003039 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003040 Found = true;
3041 }
3042 break;
3043 case AArch64::ADDXri:
3044 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3045 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003046 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003047 Found = true;
3048 }
3049 break;
3050 case AArch64::SUBWri:
3051 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3052 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003053 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003054 Found = true;
3055 }
3056 break;
3057 case AArch64::SUBXri:
3058 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3059 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003060 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003061 Found = true;
3062 }
3063 break;
3064 }
3065 return Found;
3066}
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003067/// Floating-Point Support
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003068
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003069/// Find instructions that can be turned into madd.
3070static bool getFMAPatterns(MachineInstr &Root,
3071 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3072
3073 if (!isCombineInstrCandidateFP(Root))
3074 return 0;
3075
3076 MachineBasicBlock &MBB = *Root.getParent();
3077 bool Found = false;
3078
3079 switch (Root.getOpcode()) {
3080 default:
3081 assert(false && "Unsupported FP instruction in combiner\n");
3082 break;
3083 case AArch64::FADDSrr:
3084 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3085 "FADDWrr does not have register operands");
3086 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3087 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3088 Found = true;
3089 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3090 AArch64::FMULv1i32_indexed)) {
3091 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3092 Found = true;
3093 }
3094 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3095 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3096 Found = true;
3097 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3098 AArch64::FMULv1i32_indexed)) {
3099 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3100 Found = true;
3101 }
3102 break;
3103 case AArch64::FADDDrr:
3104 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3105 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3106 Found = true;
3107 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3108 AArch64::FMULv1i64_indexed)) {
3109 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3110 Found = true;
3111 }
3112 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3113 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3114 Found = true;
3115 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3116 AArch64::FMULv1i64_indexed)) {
3117 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3118 Found = true;
3119 }
3120 break;
3121 case AArch64::FADDv2f32:
3122 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3123 AArch64::FMULv2i32_indexed)) {
3124 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3125 Found = true;
3126 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3127 AArch64::FMULv2f32)) {
3128 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3129 Found = true;
3130 }
3131 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3132 AArch64::FMULv2i32_indexed)) {
3133 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3134 Found = true;
3135 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3136 AArch64::FMULv2f32)) {
3137 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3138 Found = true;
3139 }
3140 break;
3141 case AArch64::FADDv2f64:
3142 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3143 AArch64::FMULv2i64_indexed)) {
3144 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3145 Found = true;
3146 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3147 AArch64::FMULv2f64)) {
3148 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3149 Found = true;
3150 }
3151 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3152 AArch64::FMULv2i64_indexed)) {
3153 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3154 Found = true;
3155 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3156 AArch64::FMULv2f64)) {
3157 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3158 Found = true;
3159 }
3160 break;
3161 case AArch64::FADDv4f32:
3162 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3163 AArch64::FMULv4i32_indexed)) {
3164 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3165 Found = true;
3166 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3167 AArch64::FMULv4f32)) {
3168 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3169 Found = true;
3170 }
3171 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3172 AArch64::FMULv4i32_indexed)) {
3173 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3174 Found = true;
3175 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3176 AArch64::FMULv4f32)) {
3177 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3178 Found = true;
3179 }
3180 break;
3181
3182 case AArch64::FSUBSrr:
3183 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3184 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3185 Found = true;
3186 }
3187 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3188 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3189 Found = true;
3190 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3191 AArch64::FMULv1i32_indexed)) {
3192 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3193 Found = true;
3194 }
3195 break;
3196 case AArch64::FSUBDrr:
3197 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3198 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3199 Found = true;
3200 }
3201 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3202 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3203 Found = true;
3204 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3205 AArch64::FMULv1i64_indexed)) {
3206 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3207 Found = true;
3208 }
3209 break;
3210 case AArch64::FSUBv2f32:
3211 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3212 AArch64::FMULv2i32_indexed)) {
3213 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3214 Found = true;
3215 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3216 AArch64::FMULv2f32)) {
3217 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3218 Found = true;
3219 }
3220 break;
3221 case AArch64::FSUBv2f64:
3222 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3223 AArch64::FMULv2i64_indexed)) {
3224 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3225 Found = true;
3226 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3227 AArch64::FMULv2f64)) {
3228 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3229 Found = true;
3230 }
3231 break;
3232 case AArch64::FSUBv4f32:
3233 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3234 AArch64::FMULv4i32_indexed)) {
3235 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3236 Found = true;
3237 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3238 AArch64::FMULv4f32)) {
3239 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3240 Found = true;
3241 }
3242 break;
3243 }
3244 return Found;
3245}
3246
3247/// Return true when a code sequence can improve throughput. It
3248/// should be called only for instructions in loops.
3249/// \param Pattern - combiner pattern
3250bool
3251AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
3252 switch (Pattern) {
3253 default:
3254 break;
3255 case MachineCombinerPattern::FMULADDS_OP1:
3256 case MachineCombinerPattern::FMULADDS_OP2:
3257 case MachineCombinerPattern::FMULSUBS_OP1:
3258 case MachineCombinerPattern::FMULSUBS_OP2:
3259 case MachineCombinerPattern::FMULADDD_OP1:
3260 case MachineCombinerPattern::FMULADDD_OP2:
3261 case MachineCombinerPattern::FMULSUBD_OP1:
3262 case MachineCombinerPattern::FMULSUBD_OP2:
3263 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3264 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3265 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3266 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3267 case MachineCombinerPattern::FMLAv2f32_OP2:
3268 case MachineCombinerPattern::FMLAv2f32_OP1:
3269 case MachineCombinerPattern::FMLAv2f64_OP1:
3270 case MachineCombinerPattern::FMLAv2f64_OP2:
3271 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3272 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3273 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3274 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3275 case MachineCombinerPattern::FMLAv4f32_OP1:
3276 case MachineCombinerPattern::FMLAv4f32_OP2:
3277 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3278 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3279 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3280 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3281 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3282 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3283 case MachineCombinerPattern::FMLSv2f32_OP2:
3284 case MachineCombinerPattern::FMLSv2f64_OP2:
3285 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3286 case MachineCombinerPattern::FMLSv4f32_OP2:
3287 return true;
3288 } // end switch (Pattern)
3289 return false;
3290}
Haicheng Wu08b94622016-01-07 04:01:02 +00003291/// Return true when there is potentially a faster code sequence for an
3292/// instruction chain ending in \p Root. All potential patterns are listed in
3293/// the \p Pattern vector. Pattern should be sorted in priority order since the
3294/// pattern evaluator stops checking as soon as it finds a faster sequence.
3295
3296bool AArch64InstrInfo::getMachineCombinerPatterns(
3297 MachineInstr &Root,
3298 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003299 // Integer patterns
Haicheng Wu08b94622016-01-07 04:01:02 +00003300 if (getMaddPatterns(Root, Patterns))
3301 return true;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003302 // Floating point patterns
3303 if (getFMAPatterns(Root, Patterns))
3304 return true;
Haicheng Wu08b94622016-01-07 04:01:02 +00003305
3306 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3307}
3308
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003309enum class FMAInstKind { Default, Indexed, Accumulator };
3310/// genFusedMultiply - Generate fused multiply instructions.
3311/// This function supports both integer and floating point instructions.
3312/// A typical example:
3313/// F|MUL I=A,B,0
3314/// F|ADD R,I,C
3315/// ==> F|MADD R,A,B,C
3316/// \param Root is the F|ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003317/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003318/// contain the generated madd instruction
3319/// \param IdxMulOpd is index of operand in Root that is the result of
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003320/// the F|MUL. In the example above IdxMulOpd is 1.
3321/// \param MaddOpc the opcode fo the f|madd instruction
3322static MachineInstr *
3323genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3324 const TargetInstrInfo *TII, MachineInstr &Root,
3325 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3326 unsigned MaddOpc, const TargetRegisterClass *RC,
3327 FMAInstKind kind = FMAInstKind::Default) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003328 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3329
3330 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3331 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003332 unsigned ResultReg = Root.getOperand(0).getReg();
3333 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3334 bool Src0IsKill = MUL->getOperand(1).isKill();
3335 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3336 bool Src1IsKill = MUL->getOperand(2).isKill();
3337 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3338 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3339
3340 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3341 MRI.constrainRegClass(ResultReg, RC);
3342 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3343 MRI.constrainRegClass(SrcReg0, RC);
3344 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3345 MRI.constrainRegClass(SrcReg1, RC);
3346 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3347 MRI.constrainRegClass(SrcReg2, RC);
3348
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003349 MachineInstrBuilder MIB;
3350 if (kind == FMAInstKind::Default)
3351 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3352 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3353 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3354 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3355 else if (kind == FMAInstKind::Indexed)
3356 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3357 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3358 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3359 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3360 .addImm(MUL->getOperand(3).getImm());
3361 else if (kind == FMAInstKind::Accumulator)
3362 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3363 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3364 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3365 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3366 else
3367 assert(false && "Invalid FMA instruction kind \n");
3368 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003369 InsInstrs.push_back(MIB);
3370 return MUL;
3371}
3372
3373/// genMaddR - Generate madd instruction and combine mul and add using
3374/// an extra virtual register
3375/// Example - an ADD intermediate needs to be stored in a register:
3376/// MUL I=A,B,0
3377/// ADD R,I,Imm
3378/// ==> ORR V, ZR, Imm
3379/// ==> MADD R,A,B,V
3380/// \param Root is the ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003381/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003382/// contain the generated madd instruction
3383/// \param IdxMulOpd is index of operand in Root that is the result of
3384/// the MUL. In the example above IdxMulOpd is 1.
3385/// \param MaddOpc the opcode fo the madd instruction
3386/// \param VR is a virtual register that holds the value of an ADD operand
3387/// (V in the example above).
3388static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3389 const TargetInstrInfo *TII, MachineInstr &Root,
3390 SmallVectorImpl<MachineInstr *> &InsInstrs,
3391 unsigned IdxMulOpd, unsigned MaddOpc,
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003392 unsigned VR, const TargetRegisterClass *RC) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003393 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3394
3395 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003396 unsigned ResultReg = Root.getOperand(0).getReg();
3397 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3398 bool Src0IsKill = MUL->getOperand(1).isKill();
3399 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3400 bool Src1IsKill = MUL->getOperand(2).isKill();
3401
3402 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3403 MRI.constrainRegClass(ResultReg, RC);
3404 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3405 MRI.constrainRegClass(SrcReg0, RC);
3406 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3407 MRI.constrainRegClass(SrcReg1, RC);
3408 if (TargetRegisterInfo::isVirtualRegister(VR))
3409 MRI.constrainRegClass(VR, RC);
3410
3411 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
3412 ResultReg)
3413 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3414 .addReg(SrcReg1, getKillRegState(Src1IsKill))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003415 .addReg(VR);
3416 // Insert the MADD
3417 InsInstrs.push_back(MIB);
3418 return MUL;
3419}
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003420
Sanjay Patelcfe03932015-06-19 23:21:42 +00003421/// When getMachineCombinerPatterns() finds potential patterns,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003422/// this function generates the instructions that could replace the
3423/// original code sequence
3424void AArch64InstrInfo::genAlternativeCodeSequence(
Sanjay Patel387e66e2015-11-05 19:34:57 +00003425 MachineInstr &Root, MachineCombinerPattern Pattern,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003426 SmallVectorImpl<MachineInstr *> &InsInstrs,
3427 SmallVectorImpl<MachineInstr *> &DelInstrs,
3428 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3429 MachineBasicBlock &MBB = *Root.getParent();
3430 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3431 MachineFunction &MF = *MBB.getParent();
Eric Christophere0818912014-09-03 20:36:26 +00003432 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003433
3434 MachineInstr *MUL;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003435 const TargetRegisterClass *RC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003436 unsigned Opc;
3437 switch (Pattern) {
3438 default:
Haicheng Wu08b94622016-01-07 04:01:02 +00003439 // Reassociate instructions.
3440 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3441 DelInstrs, InstrIdxForVirtReg);
3442 return;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003443 case MachineCombinerPattern::MULADDW_OP1:
3444 case MachineCombinerPattern::MULADDX_OP1:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003445 // MUL I=A,B,0
3446 // ADD R,I,C
3447 // ==> MADD R,A,B,C
3448 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003449 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003450 Opc = AArch64::MADDWrrr;
3451 RC = &AArch64::GPR32RegClass;
3452 } else {
3453 Opc = AArch64::MADDXrrr;
3454 RC = &AArch64::GPR64RegClass;
3455 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003456 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003457 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003458 case MachineCombinerPattern::MULADDW_OP2:
3459 case MachineCombinerPattern::MULADDX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003460 // MUL I=A,B,0
3461 // ADD R,C,I
3462 // ==> MADD R,A,B,C
3463 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003464 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003465 Opc = AArch64::MADDWrrr;
3466 RC = &AArch64::GPR32RegClass;
3467 } else {
3468 Opc = AArch64::MADDXrrr;
3469 RC = &AArch64::GPR64RegClass;
3470 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003471 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003472 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003473 case MachineCombinerPattern::MULADDWI_OP1:
3474 case MachineCombinerPattern::MULADDXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003475 // MUL I=A,B,0
3476 // ADD R,I,Imm
3477 // ==> ORR V, ZR, Imm
3478 // ==> MADD R,A,B,V
3479 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003480 const TargetRegisterClass *OrrRC;
3481 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003482 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003483 OrrOpc = AArch64::ORRWri;
3484 OrrRC = &AArch64::GPR32spRegClass;
3485 BitSize = 32;
3486 ZeroReg = AArch64::WZR;
3487 Opc = AArch64::MADDWrrr;
3488 RC = &AArch64::GPR32RegClass;
3489 } else {
3490 OrrOpc = AArch64::ORRXri;
3491 OrrRC = &AArch64::GPR64spRegClass;
3492 BitSize = 64;
3493 ZeroReg = AArch64::XZR;
3494 Opc = AArch64::MADDXrrr;
3495 RC = &AArch64::GPR64RegClass;
3496 }
3497 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3498 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003499
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003500 if (Root.getOperand(3).isImm()) {
3501 unsigned Val = Root.getOperand(3).getImm();
3502 Imm = Imm << Val;
3503 }
David Majnemer1182dd82016-07-21 23:46:56 +00003504 uint64_t UImm = SignExtend64(Imm, BitSize);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003505 uint64_t Encoding;
3506 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3507 MachineInstrBuilder MIB1 =
3508 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3509 .addReg(ZeroReg)
3510 .addImm(Encoding);
3511 InsInstrs.push_back(MIB1);
3512 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3513 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003514 }
3515 break;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003516 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003517 case MachineCombinerPattern::MULSUBW_OP1:
3518 case MachineCombinerPattern::MULSUBX_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003519 // MUL I=A,B,0
3520 // SUB R,I, C
3521 // ==> SUB V, 0, C
3522 // ==> MADD R,A,B,V // = -C + A*B
3523 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003524 const TargetRegisterClass *SubRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003525 unsigned SubOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003526 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003527 SubOpc = AArch64::SUBWrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003528 SubRC = &AArch64::GPR32spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003529 ZeroReg = AArch64::WZR;
3530 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003531 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003532 } else {
3533 SubOpc = AArch64::SUBXrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003534 SubRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003535 ZeroReg = AArch64::XZR;
3536 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003537 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003538 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003539 unsigned NewVR = MRI.createVirtualRegister(SubRC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003540 // SUB NewVR, 0, C
3541 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003542 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003543 .addReg(ZeroReg)
3544 .addOperand(Root.getOperand(2));
3545 InsInstrs.push_back(MIB1);
3546 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003547 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3548 break;
3549 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003550 case MachineCombinerPattern::MULSUBW_OP2:
3551 case MachineCombinerPattern::MULSUBX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003552 // MUL I=A,B,0
3553 // SUB R,C,I
3554 // ==> MSUB R,A,B,C (computes C - A*B)
3555 // --- Create(MSUB);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003556 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003557 Opc = AArch64::MSUBWrrr;
3558 RC = &AArch64::GPR32RegClass;
3559 } else {
3560 Opc = AArch64::MSUBXrrr;
3561 RC = &AArch64::GPR64RegClass;
3562 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003563 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003564 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003565 case MachineCombinerPattern::MULSUBWI_OP1:
3566 case MachineCombinerPattern::MULSUBXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003567 // MUL I=A,B,0
3568 // SUB R,I, Imm
3569 // ==> ORR V, ZR, -Imm
3570 // ==> MADD R,A,B,V // = -Imm + A*B
3571 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003572 const TargetRegisterClass *OrrRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003573 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003574 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
Juergen Ributzka25816b02014-08-30 06:16:26 +00003575 OrrOpc = AArch64::ORRWri;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003576 OrrRC = &AArch64::GPR32spRegClass;
3577 BitSize = 32;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003578 ZeroReg = AArch64::WZR;
3579 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003580 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003581 } else {
3582 OrrOpc = AArch64::ORRXri;
Juergen Ributzkaf9660f02014-11-04 22:20:07 +00003583 OrrRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003584 BitSize = 64;
3585 ZeroReg = AArch64::XZR;
3586 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003587 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003588 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003589 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
David Majnemer1182dd82016-07-21 23:46:56 +00003590 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003591 if (Root.getOperand(3).isImm()) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003592 unsigned Val = Root.getOperand(3).getImm();
3593 Imm = Imm << Val;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003594 }
David Majnemer1182dd82016-07-21 23:46:56 +00003595 uint64_t UImm = SignExtend64(-Imm, BitSize);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003596 uint64_t Encoding;
3597 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3598 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003599 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003600 .addReg(ZeroReg)
3601 .addImm(Encoding);
3602 InsInstrs.push_back(MIB1);
3603 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003604 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003605 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003606 break;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003607 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003608 // Floating Point Support
3609 case MachineCombinerPattern::FMULADDS_OP1:
3610 case MachineCombinerPattern::FMULADDD_OP1:
3611 // MUL I=A,B,0
3612 // ADD R,I,C
3613 // ==> MADD R,A,B,C
3614 // --- Create(MADD);
3615 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3616 Opc = AArch64::FMADDSrrr;
3617 RC = &AArch64::FPR32RegClass;
3618 } else {
3619 Opc = AArch64::FMADDDrrr;
3620 RC = &AArch64::FPR64RegClass;
3621 }
3622 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3623 break;
3624 case MachineCombinerPattern::FMULADDS_OP2:
3625 case MachineCombinerPattern::FMULADDD_OP2:
3626 // FMUL I=A,B,0
3627 // FADD R,C,I
3628 // ==> FMADD R,A,B,C
3629 // --- Create(FMADD);
3630 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3631 Opc = AArch64::FMADDSrrr;
3632 RC = &AArch64::FPR32RegClass;
3633 } else {
3634 Opc = AArch64::FMADDDrrr;
3635 RC = &AArch64::FPR64RegClass;
3636 }
3637 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3638 break;
3639
3640 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3641 Opc = AArch64::FMLAv1i32_indexed;
3642 RC = &AArch64::FPR32RegClass;
3643 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3644 FMAInstKind::Indexed);
3645 break;
3646 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3647 Opc = AArch64::FMLAv1i32_indexed;
3648 RC = &AArch64::FPR32RegClass;
3649 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3650 FMAInstKind::Indexed);
3651 break;
3652
3653 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3654 Opc = AArch64::FMLAv1i64_indexed;
3655 RC = &AArch64::FPR64RegClass;
3656 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3657 FMAInstKind::Indexed);
3658 break;
3659 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3660 Opc = AArch64::FMLAv1i64_indexed;
3661 RC = &AArch64::FPR64RegClass;
3662 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3663 FMAInstKind::Indexed);
3664 break;
3665
3666 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3667 case MachineCombinerPattern::FMLAv2f32_OP1:
3668 RC = &AArch64::FPR64RegClass;
3669 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
3670 Opc = AArch64::FMLAv2i32_indexed;
3671 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3672 FMAInstKind::Indexed);
3673 } else {
3674 Opc = AArch64::FMLAv2f32;
3675 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3676 FMAInstKind::Accumulator);
3677 }
3678 break;
3679 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3680 case MachineCombinerPattern::FMLAv2f32_OP2:
3681 RC = &AArch64::FPR64RegClass;
3682 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
3683 Opc = AArch64::FMLAv2i32_indexed;
3684 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3685 FMAInstKind::Indexed);
3686 } else {
3687 Opc = AArch64::FMLAv2f32;
3688 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3689 FMAInstKind::Accumulator);
3690 }
3691 break;
3692
3693 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3694 case MachineCombinerPattern::FMLAv2f64_OP1:
3695 RC = &AArch64::FPR128RegClass;
3696 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
3697 Opc = AArch64::FMLAv2i64_indexed;
3698 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3699 FMAInstKind::Indexed);
3700 } else {
3701 Opc = AArch64::FMLAv2f64;
3702 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3703 FMAInstKind::Accumulator);
3704 }
3705 break;
3706 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3707 case MachineCombinerPattern::FMLAv2f64_OP2:
3708 RC = &AArch64::FPR128RegClass;
3709 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
3710 Opc = AArch64::FMLAv2i64_indexed;
3711 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3712 FMAInstKind::Indexed);
3713 } else {
3714 Opc = AArch64::FMLAv2f64;
3715 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3716 FMAInstKind::Accumulator);
3717 }
3718 break;
3719
3720 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3721 case MachineCombinerPattern::FMLAv4f32_OP1:
3722 RC = &AArch64::FPR128RegClass;
3723 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
3724 Opc = AArch64::FMLAv4i32_indexed;
3725 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3726 FMAInstKind::Indexed);
3727 } else {
3728 Opc = AArch64::FMLAv4f32;
3729 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3730 FMAInstKind::Accumulator);
3731 }
3732 break;
3733
3734 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3735 case MachineCombinerPattern::FMLAv4f32_OP2:
3736 RC = &AArch64::FPR128RegClass;
3737 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
3738 Opc = AArch64::FMLAv4i32_indexed;
3739 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3740 FMAInstKind::Indexed);
3741 } else {
3742 Opc = AArch64::FMLAv4f32;
3743 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3744 FMAInstKind::Accumulator);
3745 }
3746 break;
3747
3748 case MachineCombinerPattern::FMULSUBS_OP1:
3749 case MachineCombinerPattern::FMULSUBD_OP1: {
3750 // FMUL I=A,B,0
3751 // FSUB R,I,C
3752 // ==> FNMSUB R,A,B,C // = -C + A*B
3753 // --- Create(FNMSUB);
3754 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
3755 Opc = AArch64::FNMSUBSrrr;
3756 RC = &AArch64::FPR32RegClass;
3757 } else {
3758 Opc = AArch64::FNMSUBDrrr;
3759 RC = &AArch64::FPR64RegClass;
3760 }
3761 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3762 break;
3763 }
3764 case MachineCombinerPattern::FMULSUBS_OP2:
3765 case MachineCombinerPattern::FMULSUBD_OP2: {
3766 // FMUL I=A,B,0
3767 // FSUB R,C,I
3768 // ==> FMSUB R,A,B,C (computes C - A*B)
3769 // --- Create(FMSUB);
3770 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
3771 Opc = AArch64::FMSUBSrrr;
3772 RC = &AArch64::FPR32RegClass;
3773 } else {
3774 Opc = AArch64::FMSUBDrrr;
3775 RC = &AArch64::FPR64RegClass;
3776 }
3777 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3778 break;
3779
3780 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3781 Opc = AArch64::FMLSv1i32_indexed;
3782 RC = &AArch64::FPR32RegClass;
3783 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3784 FMAInstKind::Indexed);
3785 break;
3786
3787 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3788 Opc = AArch64::FMLSv1i64_indexed;
3789 RC = &AArch64::FPR64RegClass;
3790 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3791 FMAInstKind::Indexed);
3792 break;
3793
3794 case MachineCombinerPattern::FMLSv2f32_OP2:
3795 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3796 RC = &AArch64::FPR64RegClass;
3797 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
3798 Opc = AArch64::FMLSv2i32_indexed;
3799 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3800 FMAInstKind::Indexed);
3801 } else {
3802 Opc = AArch64::FMLSv2f32;
3803 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3804 FMAInstKind::Accumulator);
3805 }
3806 break;
3807
3808 case MachineCombinerPattern::FMLSv2f64_OP2:
3809 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3810 RC = &AArch64::FPR128RegClass;
3811 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
3812 Opc = AArch64::FMLSv2i64_indexed;
3813 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3814 FMAInstKind::Indexed);
3815 } else {
3816 Opc = AArch64::FMLSv2f64;
3817 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3818 FMAInstKind::Accumulator);
3819 }
3820 break;
3821
3822 case MachineCombinerPattern::FMLSv4f32_OP2:
3823 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3824 RC = &AArch64::FPR128RegClass;
3825 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
3826 Opc = AArch64::FMLSv4i32_indexed;
3827 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3828 FMAInstKind::Indexed);
3829 } else {
3830 Opc = AArch64::FMLSv4f32;
3831 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3832 FMAInstKind::Accumulator);
3833 }
3834 break;
3835 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003836 } // end switch (Pattern)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003837 // Record MUL and ADD/SUB for deletion
3838 DelInstrs.push_back(MUL);
3839 DelInstrs.push_back(&Root);
3840
3841 return;
3842}
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003843
3844/// \brief Replace csincr-branch sequence by simple conditional branch
3845///
3846/// Examples:
3847/// 1.
3848/// csinc w9, wzr, wzr, <condition code>
3849/// tbnz w9, #0, 0x44
3850/// to
3851/// b.<inverted condition code>
3852///
3853/// 2.
3854/// csinc w9, wzr, wzr, <condition code>
3855/// tbz w9, #0, 0x44
3856/// to
3857/// b.<condition code>
3858///
Chad Rosier4aeab5f2016-03-21 13:43:58 +00003859/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
3860/// compare's constant operand is power of 2.
Balaram Makame9b27252016-03-10 17:54:55 +00003861///
3862/// Examples:
3863/// and w8, w8, #0x400
3864/// cbnz w8, L1
3865/// to
3866/// tbnz w8, #10, L1
3867///
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003868/// \param MI Conditional Branch
3869/// \return True when the simple conditional branch is generated
3870///
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003871bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003872 bool IsNegativeBranch = false;
3873 bool IsTestAndBranch = false;
3874 unsigned TargetBBInMI = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003875 switch (MI.getOpcode()) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003876 default:
3877 llvm_unreachable("Unknown branch instruction?");
3878 case AArch64::Bcc:
3879 return false;
3880 case AArch64::CBZW:
3881 case AArch64::CBZX:
3882 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003883 break;
3884 case AArch64::CBNZW:
3885 case AArch64::CBNZX:
3886 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003887 IsNegativeBranch = true;
3888 break;
3889 case AArch64::TBZW:
3890 case AArch64::TBZX:
3891 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003892 IsTestAndBranch = true;
3893 break;
3894 case AArch64::TBNZW:
3895 case AArch64::TBNZX:
3896 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003897 IsNegativeBranch = true;
3898 IsTestAndBranch = true;
3899 break;
3900 }
3901 // So we increment a zero register and test for bits other
3902 // than bit 0? Conservatively bail out in case the verifier
3903 // missed this case.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003904 if (IsTestAndBranch && MI.getOperand(1).getImm())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003905 return false;
3906
3907 // Find Definition.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003908 assert(MI.getParent() && "Incomplete machine instruciton\n");
3909 MachineBasicBlock *MBB = MI.getParent();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003910 MachineFunction *MF = MBB->getParent();
3911 MachineRegisterInfo *MRI = &MF->getRegInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003912 unsigned VReg = MI.getOperand(0).getReg();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003913 if (!TargetRegisterInfo::isVirtualRegister(VReg))
3914 return false;
3915
3916 MachineInstr *DefMI = MRI->getVRegDef(VReg);
3917
Balaram Makame9b27252016-03-10 17:54:55 +00003918 // Look through COPY instructions to find definition.
3919 while (DefMI->isCopy()) {
3920 unsigned CopyVReg = DefMI->getOperand(1).getReg();
3921 if (!MRI->hasOneNonDBGUse(CopyVReg))
3922 return false;
3923 if (!MRI->hasOneDef(CopyVReg))
3924 return false;
3925 DefMI = MRI->getVRegDef(CopyVReg);
3926 }
3927
3928 switch (DefMI->getOpcode()) {
3929 default:
3930 return false;
3931 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
3932 case AArch64::ANDWri:
3933 case AArch64::ANDXri: {
3934 if (IsTestAndBranch)
3935 return false;
3936 if (DefMI->getParent() != MBB)
3937 return false;
3938 if (!MRI->hasOneNonDBGUse(VReg))
3939 return false;
3940
Quentin Colombetabe2d012016-04-25 20:54:08 +00003941 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
Balaram Makame9b27252016-03-10 17:54:55 +00003942 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
Quentin Colombetabe2d012016-04-25 20:54:08 +00003943 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
Balaram Makame9b27252016-03-10 17:54:55 +00003944 if (!isPowerOf2_64(Mask))
3945 return false;
3946
3947 MachineOperand &MO = DefMI->getOperand(1);
3948 unsigned NewReg = MO.getReg();
3949 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
3950 return false;
3951
3952 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
3953
3954 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003955 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
3956 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00003957 unsigned Imm = Log2_64(Mask);
Renato Golin179d1f52016-04-23 19:30:52 +00003958 unsigned Opc = (Imm < 32)
3959 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
3960 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
Quentin Colombetabe2d012016-04-25 20:54:08 +00003961 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
3962 .addReg(NewReg)
3963 .addImm(Imm)
3964 .addMBB(TBB);
Matthias Braune25bbd02016-05-03 04:54:16 +00003965 // Register lives on to the CBZ now.
3966 MO.setIsKill(false);
Quentin Colombetabe2d012016-04-25 20:54:08 +00003967
3968 // For immediate smaller than 32, we need to use the 32-bit
3969 // variant (W) in all cases. Indeed the 64-bit variant does not
3970 // allow to encode them.
3971 // Therefore, if the input register is 64-bit, we need to take the
3972 // 32-bit sub-part.
3973 if (!Is32Bit && Imm < 32)
3974 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003975 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00003976 return true;
3977 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003978 // Look for CSINC
Balaram Makame9b27252016-03-10 17:54:55 +00003979 case AArch64::CSINCWr:
3980 case AArch64::CSINCXr: {
3981 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
3982 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
3983 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
3984 DefMI->getOperand(2).getReg() == AArch64::XZR))
3985 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003986
Balaram Makame9b27252016-03-10 17:54:55 +00003987 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
3988 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003989
Balaram Makame9b27252016-03-10 17:54:55 +00003990 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
Balaram Makame9b27252016-03-10 17:54:55 +00003991 // Convert only when the condition code is not modified between
3992 // the CSINC and the branch. The CC may be used by other
3993 // instructions in between.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00003994 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
Balaram Makame9b27252016-03-10 17:54:55 +00003995 return false;
3996 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003997 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
3998 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00003999 if (IsNegativeBranch)
4000 CC = AArch64CC::getInvertedCondCode(CC);
4001 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004002 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004003 return true;
4004 }
4005 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004006}
Alex Lorenzf3630112015-08-18 22:52:15 +00004007
4008std::pair<unsigned, unsigned>
4009AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4010 const unsigned Mask = AArch64II::MO_FRAGMENT;
4011 return std::make_pair(TF & Mask, TF & ~Mask);
4012}
4013
4014ArrayRef<std::pair<unsigned, const char *>>
4015AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4016 using namespace AArch64II;
Hal Finkel982e8d42015-08-30 08:07:29 +00004017 static const std::pair<unsigned, const char *> TargetFlags[] = {
Alex Lorenzf3630112015-08-18 22:52:15 +00004018 {MO_PAGE, "aarch64-page"},
4019 {MO_PAGEOFF, "aarch64-pageoff"},
4020 {MO_G3, "aarch64-g3"},
4021 {MO_G2, "aarch64-g2"},
4022 {MO_G1, "aarch64-g1"},
4023 {MO_G0, "aarch64-g0"},
4024 {MO_HI12, "aarch64-hi12"}};
4025 return makeArrayRef(TargetFlags);
4026}
4027
4028ArrayRef<std::pair<unsigned, const char *>>
4029AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4030 using namespace AArch64II;
Hal Finkel982e8d42015-08-30 08:07:29 +00004031 static const std::pair<unsigned, const char *> TargetFlags[] = {
Alex Lorenzf3630112015-08-18 22:52:15 +00004032 {MO_GOT, "aarch64-got"},
4033 {MO_NC, "aarch64-nc"},
Rafael Espindola4d290992016-05-31 18:31:14 +00004034 {MO_TLS, "aarch64-tls"}};
Alex Lorenzf3630112015-08-18 22:52:15 +00004035 return makeArrayRef(TargetFlags);
4036}