blob: f152854546ad2f6a7f86175d3a0777e6747b5664 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
15#include "AArch64Subtarget.h"
16#include "MCTargetDesc/AArch64AddressingModes.h"
17#include "llvm/CodeGen/MachineFrameInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineMemOperand.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/PseudoSourceValue.h"
22#include "llvm/MC/MCInst.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/TargetRegistry.h"
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +000025#include <algorithm>
Tim Northover3b0846e2014-05-24 12:50:23 +000026
27using namespace llvm;
28
29#define GET_INSTRINFO_CTOR_DTOR
30#include "AArch64GenInstrInfo.inc"
31
32AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
33 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
Eric Christophera0de2532015-03-18 20:37:30 +000034 RI(STI.getTargetTriple()), Subtarget(STI) {}
Tim Northover3b0846e2014-05-24 12:50:23 +000035
36/// GetInstSize - Return the number of bytes of code the specified
37/// instruction may be. This returns the maximum number of bytes.
38unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
Tim Northoverd5531f72014-06-17 11:31:42 +000039 const MachineBasicBlock &MBB = *MI->getParent();
40 const MachineFunction *MF = MBB.getParent();
41 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +000042
Tim Northoverd5531f72014-06-17 11:31:42 +000043 if (MI->getOpcode() == AArch64::INLINEASM)
44 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
45
46 const MCInstrDesc &Desc = MI->getDesc();
Tim Northover3b0846e2014-05-24 12:50:23 +000047 switch (Desc.getOpcode()) {
48 default:
49 // Anything not explicitly designated otherwise is a nomal 4-byte insn.
50 return 4;
51 case TargetOpcode::DBG_VALUE:
52 case TargetOpcode::EH_LABEL:
53 case TargetOpcode::IMPLICIT_DEF:
54 case TargetOpcode::KILL:
55 return 0;
56 }
57
58 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
59}
60
61static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
62 SmallVectorImpl<MachineOperand> &Cond) {
63 // Block ends with fall-through condbranch.
64 switch (LastInst->getOpcode()) {
65 default:
66 llvm_unreachable("Unknown branch instruction?");
67 case AArch64::Bcc:
68 Target = LastInst->getOperand(1).getMBB();
69 Cond.push_back(LastInst->getOperand(0));
70 break;
71 case AArch64::CBZW:
72 case AArch64::CBZX:
73 case AArch64::CBNZW:
74 case AArch64::CBNZX:
75 Target = LastInst->getOperand(1).getMBB();
76 Cond.push_back(MachineOperand::CreateImm(-1));
77 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
78 Cond.push_back(LastInst->getOperand(0));
79 break;
80 case AArch64::TBZW:
81 case AArch64::TBZX:
82 case AArch64::TBNZW:
83 case AArch64::TBNZX:
84 Target = LastInst->getOperand(2).getMBB();
85 Cond.push_back(MachineOperand::CreateImm(-1));
86 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
87 Cond.push_back(LastInst->getOperand(0));
88 Cond.push_back(LastInst->getOperand(1));
89 }
90}
91
92// Branch analysis.
93bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
94 MachineBasicBlock *&TBB,
95 MachineBasicBlock *&FBB,
96 SmallVectorImpl<MachineOperand> &Cond,
97 bool AllowModify) const {
98 // If the block has no terminators, it just falls into the block after it.
Benjamin Kramere61cbd12015-06-25 13:28:24 +000099 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
100 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000101 return false;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000102
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000103 if (!isUnpredicatedTerminator(*I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000104 return false;
105
106 // Get the last instruction in the block.
107 MachineInstr *LastInst = I;
108
109 // If there is only one terminator instruction, process it.
110 unsigned LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000111 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000112 if (isUncondBranchOpcode(LastOpc)) {
113 TBB = LastInst->getOperand(0).getMBB();
114 return false;
115 }
116 if (isCondBranchOpcode(LastOpc)) {
117 // Block ends with fall-through condbranch.
118 parseCondBranch(LastInst, TBB, Cond);
119 return false;
120 }
121 return true; // Can't handle indirect branch.
122 }
123
124 // Get the instruction before it if it is a terminator.
125 MachineInstr *SecondLastInst = I;
126 unsigned SecondLastOpc = SecondLastInst->getOpcode();
127
128 // If AllowModify is true and the block ends with two or more unconditional
129 // branches, delete all but the first unconditional branch.
130 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
131 while (isUncondBranchOpcode(SecondLastOpc)) {
132 LastInst->eraseFromParent();
133 LastInst = SecondLastInst;
134 LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000135 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000136 // Return now the only terminator is an unconditional branch.
137 TBB = LastInst->getOperand(0).getMBB();
138 return false;
139 } else {
140 SecondLastInst = I;
141 SecondLastOpc = SecondLastInst->getOpcode();
142 }
143 }
144 }
145
146 // If there are three terminators, we don't know what sort of block this is.
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000147 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000148 return true;
149
150 // If the block ends with a B and a Bcc, handle it.
151 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
152 parseCondBranch(SecondLastInst, TBB, Cond);
153 FBB = LastInst->getOperand(0).getMBB();
154 return false;
155 }
156
157 // If the block ends with two unconditional branches, handle it. The second
158 // one is not executed, so remove it.
159 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
160 TBB = SecondLastInst->getOperand(0).getMBB();
161 I = LastInst;
162 if (AllowModify)
163 I->eraseFromParent();
164 return false;
165 }
166
167 // ...likewise if it ends with an indirect branch followed by an unconditional
168 // branch.
169 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
170 I = LastInst;
171 if (AllowModify)
172 I->eraseFromParent();
173 return true;
174 }
175
176 // Otherwise, can't handle this.
177 return true;
178}
179
180bool AArch64InstrInfo::ReverseBranchCondition(
181 SmallVectorImpl<MachineOperand> &Cond) const {
182 if (Cond[0].getImm() != -1) {
183 // Regular Bcc
184 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
185 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
186 } else {
187 // Folded compare-and-branch
188 switch (Cond[1].getImm()) {
189 default:
190 llvm_unreachable("Unknown conditional branch!");
191 case AArch64::CBZW:
192 Cond[1].setImm(AArch64::CBNZW);
193 break;
194 case AArch64::CBNZW:
195 Cond[1].setImm(AArch64::CBZW);
196 break;
197 case AArch64::CBZX:
198 Cond[1].setImm(AArch64::CBNZX);
199 break;
200 case AArch64::CBNZX:
201 Cond[1].setImm(AArch64::CBZX);
202 break;
203 case AArch64::TBZW:
204 Cond[1].setImm(AArch64::TBNZW);
205 break;
206 case AArch64::TBNZW:
207 Cond[1].setImm(AArch64::TBZW);
208 break;
209 case AArch64::TBZX:
210 Cond[1].setImm(AArch64::TBNZX);
211 break;
212 case AArch64::TBNZX:
213 Cond[1].setImm(AArch64::TBZX);
214 break;
215 }
216 }
217
218 return false;
219}
220
221unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000222 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
223 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000224 return 0;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000225
Tim Northover3b0846e2014-05-24 12:50:23 +0000226 if (!isUncondBranchOpcode(I->getOpcode()) &&
227 !isCondBranchOpcode(I->getOpcode()))
228 return 0;
229
230 // Remove the branch.
231 I->eraseFromParent();
232
233 I = MBB.end();
234
235 if (I == MBB.begin())
236 return 1;
237 --I;
238 if (!isCondBranchOpcode(I->getOpcode()))
239 return 1;
240
241 // Remove the branch.
242 I->eraseFromParent();
243 return 2;
244}
245
246void AArch64InstrInfo::instantiateCondBranch(
247 MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000248 ArrayRef<MachineOperand> Cond) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000249 if (Cond[0].getImm() != -1) {
250 // Regular Bcc
251 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
252 } else {
253 // Folded compare-and-branch
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000254 // Note that we use addOperand instead of addReg to keep the flags.
Tim Northover3b0846e2014-05-24 12:50:23 +0000255 const MachineInstrBuilder MIB =
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000256 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
Tim Northover3b0846e2014-05-24 12:50:23 +0000257 if (Cond.size() > 3)
258 MIB.addImm(Cond[3].getImm());
259 MIB.addMBB(TBB);
260 }
261}
262
263unsigned AArch64InstrInfo::InsertBranch(
264 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000265 ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000266 // Shouldn't be a fall through.
267 assert(TBB && "InsertBranch must not be told to insert a fallthrough");
268
269 if (!FBB) {
270 if (Cond.empty()) // Unconditional branch?
271 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
272 else
273 instantiateCondBranch(MBB, DL, TBB, Cond);
274 return 1;
275 }
276
277 // Two-way conditional branch.
278 instantiateCondBranch(MBB, DL, TBB, Cond);
279 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
280 return 2;
281}
282
283// Find the original register that VReg is copied from.
284static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
285 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
286 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
287 if (!DefMI->isFullCopy())
288 return VReg;
289 VReg = DefMI->getOperand(1).getReg();
290 }
291 return VReg;
292}
293
294// Determine if VReg is defined by an instruction that can be folded into a
295// csel instruction. If so, return the folded opcode, and the replacement
296// register.
297static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
298 unsigned *NewVReg = nullptr) {
299 VReg = removeCopies(MRI, VReg);
300 if (!TargetRegisterInfo::isVirtualRegister(VReg))
301 return 0;
302
303 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
304 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
305 unsigned Opc = 0;
306 unsigned SrcOpNum = 0;
307 switch (DefMI->getOpcode()) {
308 case AArch64::ADDSXri:
309 case AArch64::ADDSWri:
310 // if NZCV is used, do not fold.
311 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
312 return 0;
313 // fall-through to ADDXri and ADDWri.
314 case AArch64::ADDXri:
315 case AArch64::ADDWri:
316 // add x, 1 -> csinc.
317 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
318 DefMI->getOperand(3).getImm() != 0)
319 return 0;
320 SrcOpNum = 1;
321 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
322 break;
323
324 case AArch64::ORNXrr:
325 case AArch64::ORNWrr: {
326 // not x -> csinv, represented as orn dst, xzr, src.
327 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
328 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
329 return 0;
330 SrcOpNum = 2;
331 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
332 break;
333 }
334
335 case AArch64::SUBSXrr:
336 case AArch64::SUBSWrr:
337 // if NZCV is used, do not fold.
338 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
339 return 0;
340 // fall-through to SUBXrr and SUBWrr.
341 case AArch64::SUBXrr:
342 case AArch64::SUBWrr: {
343 // neg x -> csneg, represented as sub dst, xzr, src.
344 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
345 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
346 return 0;
347 SrcOpNum = 2;
348 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
349 break;
350 }
351 default:
352 return 0;
353 }
354 assert(Opc && SrcOpNum && "Missing parameters");
355
356 if (NewVReg)
357 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
358 return Opc;
359}
360
361bool AArch64InstrInfo::canInsertSelect(
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000362 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000363 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
364 int &FalseCycles) const {
365 // Check register classes.
366 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
367 const TargetRegisterClass *RC =
Eric Christophera0de2532015-03-18 20:37:30 +0000368 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
Tim Northover3b0846e2014-05-24 12:50:23 +0000369 if (!RC)
370 return false;
371
372 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
373 unsigned ExtraCondLat = Cond.size() != 1;
374
375 // GPRs are handled by csel.
376 // FIXME: Fold in x+1, -x, and ~x when applicable.
377 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
378 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
379 // Single-cycle csel, csinc, csinv, and csneg.
380 CondCycles = 1 + ExtraCondLat;
381 TrueCycles = FalseCycles = 1;
382 if (canFoldIntoCSel(MRI, TrueReg))
383 TrueCycles = 0;
384 else if (canFoldIntoCSel(MRI, FalseReg))
385 FalseCycles = 0;
386 return true;
387 }
388
389 // Scalar floating point is handled by fcsel.
390 // FIXME: Form fabs, fmin, and fmax when applicable.
391 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
392 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
393 CondCycles = 5 + ExtraCondLat;
394 TrueCycles = FalseCycles = 2;
395 return true;
396 }
397
398 // Can't do vectors.
399 return false;
400}
401
402void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
403 MachineBasicBlock::iterator I, DebugLoc DL,
404 unsigned DstReg,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000405 ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000406 unsigned TrueReg, unsigned FalseReg) const {
407 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
408
409 // Parse the condition code, see parseCondBranch() above.
410 AArch64CC::CondCode CC;
411 switch (Cond.size()) {
412 default:
413 llvm_unreachable("Unknown condition opcode in Cond");
414 case 1: // b.cc
415 CC = AArch64CC::CondCode(Cond[0].getImm());
416 break;
417 case 3: { // cbz/cbnz
418 // We must insert a compare against 0.
419 bool Is64Bit;
420 switch (Cond[1].getImm()) {
421 default:
422 llvm_unreachable("Unknown branch opcode in Cond");
423 case AArch64::CBZW:
424 Is64Bit = 0;
425 CC = AArch64CC::EQ;
426 break;
427 case AArch64::CBZX:
428 Is64Bit = 1;
429 CC = AArch64CC::EQ;
430 break;
431 case AArch64::CBNZW:
432 Is64Bit = 0;
433 CC = AArch64CC::NE;
434 break;
435 case AArch64::CBNZX:
436 Is64Bit = 1;
437 CC = AArch64CC::NE;
438 break;
439 }
440 unsigned SrcReg = Cond[2].getReg();
441 if (Is64Bit) {
442 // cmp reg, #0 is actually subs xzr, reg, #0.
443 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
444 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
445 .addReg(SrcReg)
446 .addImm(0)
447 .addImm(0);
448 } else {
449 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
450 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
451 .addReg(SrcReg)
452 .addImm(0)
453 .addImm(0);
454 }
455 break;
456 }
457 case 4: { // tbz/tbnz
458 // We must insert a tst instruction.
459 switch (Cond[1].getImm()) {
460 default:
461 llvm_unreachable("Unknown branch opcode in Cond");
462 case AArch64::TBZW:
463 case AArch64::TBZX:
464 CC = AArch64CC::EQ;
465 break;
466 case AArch64::TBNZW:
467 case AArch64::TBNZX:
468 CC = AArch64CC::NE;
469 break;
470 }
471 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
472 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
473 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
474 .addReg(Cond[2].getReg())
475 .addImm(
476 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
477 else
478 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
479 .addReg(Cond[2].getReg())
480 .addImm(
481 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
482 break;
483 }
484 }
485
486 unsigned Opc = 0;
487 const TargetRegisterClass *RC = nullptr;
488 bool TryFold = false;
489 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
490 RC = &AArch64::GPR64RegClass;
491 Opc = AArch64::CSELXr;
492 TryFold = true;
493 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
494 RC = &AArch64::GPR32RegClass;
495 Opc = AArch64::CSELWr;
496 TryFold = true;
497 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
498 RC = &AArch64::FPR64RegClass;
499 Opc = AArch64::FCSELDrrr;
500 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
501 RC = &AArch64::FPR32RegClass;
502 Opc = AArch64::FCSELSrrr;
503 }
504 assert(RC && "Unsupported regclass");
505
506 // Try folding simple instructions into the csel.
507 if (TryFold) {
508 unsigned NewVReg = 0;
509 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
510 if (FoldedOpc) {
511 // The folded opcodes csinc, csinc and csneg apply the operation to
512 // FalseReg, so we need to invert the condition.
513 CC = AArch64CC::getInvertedCondCode(CC);
514 TrueReg = FalseReg;
515 } else
516 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
517
518 // Fold the operation. Leave any dead instructions for DCE to clean up.
519 if (FoldedOpc) {
520 FalseReg = NewVReg;
521 Opc = FoldedOpc;
522 // The extends the live range of NewVReg.
523 MRI.clearKillFlags(NewVReg);
524 }
525 }
526
527 // Pull all virtual register into the appropriate class.
528 MRI.constrainRegClass(TrueReg, RC);
529 MRI.constrainRegClass(FalseReg, RC);
530
531 // Insert the csel.
532 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
533 CC);
534}
535
Lawrence Hu687097a2015-07-23 23:55:28 +0000536/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
Weiming Zhaob33a5552015-07-23 19:24:53 +0000537static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) {
538 uint64_t Imm = MI->getOperand(1).getImm();
539 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
540 uint64_t Encoding;
541 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
542}
543
Jiangning Liucd296372014-07-29 02:09:26 +0000544// FIXME: this implementation should be micro-architecture dependent, so a
545// micro-architecture target hook should be introduced here in future.
546bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
Chad Rosiercd2be7f2016-02-12 15:51:51 +0000547 if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53() &&
548 !Subtarget.isKryo())
Jiangning Liucd296372014-07-29 02:09:26 +0000549 return MI->isAsCheapAsAMove();
550
551 switch (MI->getOpcode()) {
552 default:
553 return false;
554
555 // add/sub on register without shift
556 case AArch64::ADDWri:
557 case AArch64::ADDXri:
558 case AArch64::SUBWri:
559 case AArch64::SUBXri:
560 return (MI->getOperand(3).getImm() == 0);
561
562 // logical ops on immediate
563 case AArch64::ANDWri:
564 case AArch64::ANDXri:
565 case AArch64::EORWri:
566 case AArch64::EORXri:
567 case AArch64::ORRWri:
568 case AArch64::ORRXri:
569 return true;
570
571 // logical ops on register without shift
572 case AArch64::ANDWrr:
573 case AArch64::ANDXrr:
574 case AArch64::BICWrr:
575 case AArch64::BICXrr:
576 case AArch64::EONWrr:
577 case AArch64::EONXrr:
578 case AArch64::EORWrr:
579 case AArch64::EORXrr:
580 case AArch64::ORNWrr:
581 case AArch64::ORNXrr:
582 case AArch64::ORRWrr:
583 case AArch64::ORRXrr:
584 return true;
Weiming Zhaob33a5552015-07-23 19:24:53 +0000585 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
586 // ORRXri, it is as cheap as MOV
587 case AArch64::MOVi32imm:
588 return canBeExpandedToORR(MI, 32);
589 case AArch64::MOVi64imm:
590 return canBeExpandedToORR(MI, 64);
Jiangning Liucd296372014-07-29 02:09:26 +0000591 }
592
593 llvm_unreachable("Unknown opcode to check as cheap as a move!");
594}
595
Tim Northover3b0846e2014-05-24 12:50:23 +0000596bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
597 unsigned &SrcReg, unsigned &DstReg,
598 unsigned &SubIdx) const {
599 switch (MI.getOpcode()) {
600 default:
601 return false;
602 case AArch64::SBFMXri: // aka sxtw
603 case AArch64::UBFMXri: // aka uxtw
604 // Check for the 32 -> 64 bit extension case, these instructions can do
605 // much more.
606 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
607 return false;
608 // This is a signed or unsigned 32 -> 64 bit extension.
609 SrcReg = MI.getOperand(1).getReg();
610 DstReg = MI.getOperand(0).getReg();
611 SubIdx = AArch64::sub_32;
612 return true;
613 }
614}
615
Chad Rosier3528c1e2014-09-08 14:43:48 +0000616bool
617AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
618 MachineInstr *MIb,
619 AliasAnalysis *AA) const {
Eric Christophera0de2532015-03-18 20:37:30 +0000620 const TargetRegisterInfo *TRI = &getRegisterInfo();
Chad Rosier3528c1e2014-09-08 14:43:48 +0000621 unsigned BaseRegA = 0, BaseRegB = 0;
Chad Rosier0da267d2016-03-09 16:46:48 +0000622 int64_t OffsetA = 0, OffsetB = 0;
623 unsigned WidthA = 0, WidthB = 0;
Chad Rosier3528c1e2014-09-08 14:43:48 +0000624
Chad Rosiera73b3592015-05-21 21:59:57 +0000625 assert(MIa && MIa->mayLoadOrStore() && "MIa must be a load or store.");
626 assert(MIb && MIb->mayLoadOrStore() && "MIb must be a load or store.");
Chad Rosier3528c1e2014-09-08 14:43:48 +0000627
628 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
629 MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
630 return false;
631
632 // Retrieve the base register, offset from the base register and width. Width
633 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
634 // base registers are identical, and the offset of a lower memory access +
635 // the width doesn't overlap the offset of a higher memory access,
636 // then the memory accesses are different.
Sanjoy Dasb666ea32015-06-15 18:44:14 +0000637 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
638 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
Chad Rosier3528c1e2014-09-08 14:43:48 +0000639 if (BaseRegA == BaseRegB) {
640 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
641 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
642 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
643 if (LowOffset + LowWidth <= HighOffset)
644 return true;
645 }
646 }
647 return false;
648}
649
Tim Northover3b0846e2014-05-24 12:50:23 +0000650/// analyzeCompare - For a comparison instruction, return the source registers
651/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
652/// Return true if the comparison instruction can be analyzed.
653bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
654 unsigned &SrcReg2, int &CmpMask,
655 int &CmpValue) const {
656 switch (MI->getOpcode()) {
657 default:
658 break;
659 case AArch64::SUBSWrr:
660 case AArch64::SUBSWrs:
661 case AArch64::SUBSWrx:
662 case AArch64::SUBSXrr:
663 case AArch64::SUBSXrs:
664 case AArch64::SUBSXrx:
665 case AArch64::ADDSWrr:
666 case AArch64::ADDSWrs:
667 case AArch64::ADDSWrx:
668 case AArch64::ADDSXrr:
669 case AArch64::ADDSXrs:
670 case AArch64::ADDSXrx:
671 // Replace SUBSWrr with SUBWrr if NZCV is not used.
672 SrcReg = MI->getOperand(1).getReg();
673 SrcReg2 = MI->getOperand(2).getReg();
674 CmpMask = ~0;
675 CmpValue = 0;
676 return true;
677 case AArch64::SUBSWri:
678 case AArch64::ADDSWri:
679 case AArch64::SUBSXri:
680 case AArch64::ADDSXri:
681 SrcReg = MI->getOperand(1).getReg();
682 SrcReg2 = 0;
683 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000684 // FIXME: In order to convert CmpValue to 0 or 1
685 CmpValue = (MI->getOperand(2).getImm() != 0);
Tim Northover3b0846e2014-05-24 12:50:23 +0000686 return true;
687 case AArch64::ANDSWri:
688 case AArch64::ANDSXri:
689 // ANDS does not use the same encoding scheme as the others xxxS
690 // instructions.
691 SrcReg = MI->getOperand(1).getReg();
692 SrcReg2 = 0;
693 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000694 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
695 // while the type of CmpValue is int. When converting uint64_t to int,
696 // the high 32 bits of uint64_t will be lost.
697 // In fact it causes a bug in spec2006-483.xalancbmk
698 // CmpValue is only used to compare with zero in OptimizeCompareInstr
699 CmpValue = (AArch64_AM::decodeLogicalImmediate(
700 MI->getOperand(2).getImm(),
701 MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0);
Tim Northover3b0846e2014-05-24 12:50:23 +0000702 return true;
703 }
704
705 return false;
706}
707
708static bool UpdateOperandRegClass(MachineInstr *Instr) {
709 MachineBasicBlock *MBB = Instr->getParent();
710 assert(MBB && "Can't get MachineBasicBlock here");
711 MachineFunction *MF = MBB->getParent();
712 assert(MF && "Can't get MachineFunction here");
Eric Christopher6c901622015-01-28 03:51:33 +0000713 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
714 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000715 MachineRegisterInfo *MRI = &MF->getRegInfo();
716
717 for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
718 ++OpIdx) {
719 MachineOperand &MO = Instr->getOperand(OpIdx);
720 const TargetRegisterClass *OpRegCstraints =
721 Instr->getRegClassConstraint(OpIdx, TII, TRI);
722
723 // If there's no constraint, there's nothing to do.
724 if (!OpRegCstraints)
725 continue;
726 // If the operand is a frame index, there's nothing to do here.
727 // A frame index operand will resolve correctly during PEI.
728 if (MO.isFI())
729 continue;
730
731 assert(MO.isReg() &&
732 "Operand has register constraints without being a register!");
733
734 unsigned Reg = MO.getReg();
735 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
736 if (!OpRegCstraints->contains(Reg))
737 return false;
738 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
739 !MRI->constrainRegClass(Reg, OpRegCstraints))
740 return false;
741 }
742
743 return true;
744}
745
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000746/// \brief Return the opcode that does not set flags when possible - otherwise
747/// return the original opcode. The caller is responsible to do the actual
748/// substitution and legality checking.
749static unsigned convertFlagSettingOpcode(const MachineInstr *MI) {
750 // Don't convert all compare instructions, because for some the zero register
751 // encoding becomes the sp register.
752 bool MIDefinesZeroReg = false;
753 if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR))
754 MIDefinesZeroReg = true;
755
756 switch (MI->getOpcode()) {
757 default:
758 return MI->getOpcode();
759 case AArch64::ADDSWrr:
760 return AArch64::ADDWrr;
761 case AArch64::ADDSWri:
762 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
763 case AArch64::ADDSWrs:
764 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
765 case AArch64::ADDSWrx:
766 return AArch64::ADDWrx;
767 case AArch64::ADDSXrr:
768 return AArch64::ADDXrr;
769 case AArch64::ADDSXri:
770 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
771 case AArch64::ADDSXrs:
772 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
773 case AArch64::ADDSXrx:
774 return AArch64::ADDXrx;
775 case AArch64::SUBSWrr:
776 return AArch64::SUBWrr;
777 case AArch64::SUBSWri:
778 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
779 case AArch64::SUBSWrs:
780 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
781 case AArch64::SUBSWrx:
782 return AArch64::SUBWrx;
783 case AArch64::SUBSXrr:
784 return AArch64::SUBXrr;
785 case AArch64::SUBSXri:
786 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
787 case AArch64::SUBSXrs:
788 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
789 case AArch64::SUBSXrx:
790 return AArch64::SUBXrx;
791 }
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000792}
Tim Northover3b0846e2014-05-24 12:50:23 +0000793
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000794enum AccessKind {
795 AK_Write = 0x01,
796 AK_Read = 0x10,
797 AK_All = 0x11
798};
799
800/// True when condition flags are accessed (either by writing or reading)
801/// on the instruction trace starting at From and ending at To.
802///
803/// Note: If From and To are from different blocks it's assumed CC are accessed
804/// on the path.
805static bool areCFlagsAccessedBetweenInstrs(MachineInstr *From, MachineInstr *To,
806 const TargetRegisterInfo *TRI,
807 const AccessKind AccessToCheck = AK_All) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000808 // We iterate backward starting \p To until we hit \p From
809 MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin();
810
811 // Early exit if To is at the beginning of the BB.
812 if (I == B)
813 return true;
814
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000815 // Check whether the instructions are in the same basic block
816 // If not, assume the condition flags might get modified somewhere.
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000817 if (To->getParent() != From->getParent())
818 return true;
819
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000820 // From must be above To.
821 assert(std::find_if(MachineBasicBlock::reverse_iterator(To),
822 To->getParent()->rend(),
823 [From](MachineInstr &MI) {
824 return &MI == From;
825 }) != To->getParent()->rend());
826
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000827 for (--I; I != E; --I) {
828 const MachineInstr &Instr = *I;
829
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000830 if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
831 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000832 return true;
833 }
834 return false;
835}
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000836
837/// Try to optimize a compare instruction. A compare instruction is an
838/// instruction which produces AArch64::NZCV. It can be truly compare instruction
839/// when there are no uses of its destination register.
840///
841/// The following steps are tried in order:
842/// 1. Convert CmpInstr into an unconditional version.
843/// 2. Remove CmpInstr if above there is an instruction producing a needed
844/// condition code or an instruction which can be converted into such an instruction.
845/// Only comparison with zero is supported.
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000846bool AArch64InstrInfo::optimizeCompareInstr(
847 MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
848 int CmpValue, const MachineRegisterInfo *MRI) const {
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000849 assert(CmpInstr);
850 assert(CmpInstr->getParent());
851 assert(MRI);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000852
853 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000854 int DeadNZCVIdx = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
855 if (DeadNZCVIdx != -1) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000856 if (CmpInstr->definesRegister(AArch64::WZR) ||
857 CmpInstr->definesRegister(AArch64::XZR)) {
858 CmpInstr->eraseFromParent();
859 return true;
860 }
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000861 unsigned Opc = CmpInstr->getOpcode();
862 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
863 if (NewOpc == Opc)
864 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000865 const MCInstrDesc &MCID = get(NewOpc);
866 CmpInstr->setDesc(MCID);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000867 CmpInstr->RemoveOperand(DeadNZCVIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +0000868 bool succeeded = UpdateOperandRegClass(CmpInstr);
869 (void)succeeded;
870 assert(succeeded && "Some operands reg class are incompatible!");
871 return true;
872 }
873
874 // Continue only if we have a "ri" where immediate is zero.
Jiangning Liudcc651f2014-08-08 14:19:29 +0000875 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
876 // function.
877 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
Tim Northover3b0846e2014-05-24 12:50:23 +0000878 if (CmpValue != 0 || SrcReg2 != 0)
879 return false;
880
881 // CmpInstr is a Compare instruction if destination register is not used.
882 if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
883 return false;
884
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +0000885 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000886}
Tim Northover3b0846e2014-05-24 12:50:23 +0000887
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000888/// Get opcode of S version of Instr.
889/// If Instr is S version its opcode is returned.
890/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
891/// or we are not interested in it.
892static unsigned sForm(MachineInstr &Instr) {
893 switch (Instr.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000894 default:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000895 return AArch64::INSTRUCTION_LIST_END;
896
Tim Northover3b0846e2014-05-24 12:50:23 +0000897 case AArch64::ADDSWrr:
898 case AArch64::ADDSWri:
899 case AArch64::ADDSXrr:
900 case AArch64::ADDSXri:
901 case AArch64::SUBSWrr:
902 case AArch64::SUBSWri:
903 case AArch64::SUBSXrr:
904 case AArch64::SUBSXri:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000905 return Instr.getOpcode();;
906
907 case AArch64::ADDWrr: return AArch64::ADDSWrr;
908 case AArch64::ADDWri: return AArch64::ADDSWri;
909 case AArch64::ADDXrr: return AArch64::ADDSXrr;
910 case AArch64::ADDXri: return AArch64::ADDSXri;
911 case AArch64::ADCWr: return AArch64::ADCSWr;
912 case AArch64::ADCXr: return AArch64::ADCSXr;
913 case AArch64::SUBWrr: return AArch64::SUBSWrr;
914 case AArch64::SUBWri: return AArch64::SUBSWri;
915 case AArch64::SUBXrr: return AArch64::SUBSXrr;
916 case AArch64::SUBXri: return AArch64::SUBSXri;
917 case AArch64::SBCWr: return AArch64::SBCSWr;
918 case AArch64::SBCXr: return AArch64::SBCSXr;
919 case AArch64::ANDWri: return AArch64::ANDSWri;
920 case AArch64::ANDXri: return AArch64::ANDSXri;
Tim Northover3b0846e2014-05-24 12:50:23 +0000921 }
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000922}
923
924/// Check if AArch64::NZCV should be alive in successors of MBB.
925static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
926 for (auto *BB : MBB->successors())
927 if (BB->isLiveIn(AArch64::NZCV))
928 return true;
929 return false;
930}
931
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +0000932struct UsedNZCV {
933 bool N;
934 bool Z;
935 bool C;
936 bool V;
937 UsedNZCV(): N(false), Z(false), C(false), V(false) {}
938 UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
939 this->N |= UsedFlags.N;
940 this->Z |= UsedFlags.Z;
941 this->C |= UsedFlags.C;
942 this->V |= UsedFlags.V;
943 return *this;
944 }
945};
946
947/// Find a condition code used by the instruction.
948/// Returns AArch64CC::Invalid if either the instruction does not use condition
949/// codes or we don't optimize CmpInstr in the presence of such instructions.
950static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
951 switch (Instr.getOpcode()) {
952 default:
953 return AArch64CC::Invalid;
954
955 case AArch64::Bcc: {
956 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
957 assert(Idx >= 2);
958 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
959 }
960
961 case AArch64::CSINVWr:
962 case AArch64::CSINVXr:
963 case AArch64::CSINCWr:
964 case AArch64::CSINCXr:
965 case AArch64::CSELWr:
966 case AArch64::CSELXr:
967 case AArch64::CSNEGWr:
968 case AArch64::CSNEGXr:
969 case AArch64::FCSELSrrr:
970 case AArch64::FCSELDrrr: {
971 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
972 assert(Idx >= 1);
973 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
974 }
975 }
976}
977
978static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
979 assert(CC != AArch64CC::Invalid);
980 UsedNZCV UsedFlags;
981 switch (CC) {
982 default:
983 break;
984
985 case AArch64CC::EQ: // Z set
986 case AArch64CC::NE: // Z clear
987 UsedFlags.Z = true;
988 break;
989
990 case AArch64CC::HI: // Z clear and C set
991 case AArch64CC::LS: // Z set or C clear
992 UsedFlags.Z = true;
993 case AArch64CC::HS: // C set
994 case AArch64CC::LO: // C clear
995 UsedFlags.C = true;
996 break;
997
998 case AArch64CC::MI: // N set
999 case AArch64CC::PL: // N clear
1000 UsedFlags.N = true;
1001 break;
1002
1003 case AArch64CC::VS: // V set
1004 case AArch64CC::VC: // V clear
1005 UsedFlags.V = true;
1006 break;
1007
1008 case AArch64CC::GT: // Z clear, N and V the same
1009 case AArch64CC::LE: // Z set, N and V differ
1010 UsedFlags.Z = true;
1011 case AArch64CC::GE: // N and V the same
1012 case AArch64CC::LT: // N and V differ
1013 UsedFlags.N = true;
1014 UsedFlags.V = true;
1015 break;
1016 }
1017 return UsedFlags;
1018}
1019
1020static bool isADDSRegImm(unsigned Opcode) {
1021 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1022}
1023
1024static bool isSUBSRegImm(unsigned Opcode) {
1025 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1026}
1027
1028/// Check if CmpInstr can be substituted by MI.
1029///
1030/// CmpInstr can be substituted:
1031/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1032/// - and, MI and CmpInstr are from the same MachineBB
1033/// - and, condition flags are not alive in successors of the CmpInstr parent
1034/// - and, if MI opcode is the S form there must be no defs of flags between
1035/// MI and CmpInstr
1036/// or if MI opcode is not the S form there must be neither defs of flags
1037/// nor uses of flags between MI and CmpInstr.
1038/// - and C/V flags are not used after CmpInstr
1039static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1040 const TargetRegisterInfo *TRI) {
1041 assert(MI);
1042 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1043 assert(CmpInstr);
1044
1045 const unsigned CmpOpcode = CmpInstr->getOpcode();
1046 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1047 return false;
1048
1049 if (MI->getParent() != CmpInstr->getParent())
1050 return false;
1051
1052 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1053 return false;
1054
1055 AccessKind AccessToCheck = AK_Write;
1056 if (sForm(*MI) != MI->getOpcode())
1057 AccessToCheck = AK_All;
1058 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1059 return false;
1060
1061 UsedNZCV NZCVUsedAfterCmp;
1062 for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
1063 I != E; ++I) {
1064 const MachineInstr &Instr = *I;
1065 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1066 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1067 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1068 return false;
1069 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1070 }
1071
1072 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1073 break;
1074 }
1075
1076 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1077}
1078
1079/// Substitute an instruction comparing to zero with another instruction
1080/// which produces needed condition flags.
1081///
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001082/// Return true on success.
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001083bool AArch64InstrInfo::substituteCmpToZero(MachineInstr *CmpInstr,
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001084 unsigned SrcReg, const MachineRegisterInfo *MRI) const {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001085 assert(CmpInstr);
1086 assert(MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001087 // Get the unique definition of SrcReg.
1088 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1089 if (!MI)
1090 return false;
1091
1092 const TargetRegisterInfo *TRI = &getRegisterInfo();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001093
1094 unsigned NewOpc = sForm(*MI);
1095 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1096 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001097
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001098 if (!canInstrSubstituteCmpInstr(MI, CmpInstr, TRI))
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001099 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001100
1101 // Update the instruction to set NZCV.
1102 MI->setDesc(get(NewOpc));
1103 CmpInstr->eraseFromParent();
1104 bool succeeded = UpdateOperandRegClass(MI);
1105 (void)succeeded;
1106 assert(succeeded && "Some operands reg class are incompatible!");
1107 MI->addRegisterDefined(AArch64::NZCV, TRI);
1108 return true;
1109}
1110
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001111bool
1112AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
1113 if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1114 return false;
1115
1116 MachineBasicBlock &MBB = *MI->getParent();
1117 DebugLoc DL = MI->getDebugLoc();
1118 unsigned Reg = MI->getOperand(0).getReg();
1119 const GlobalValue *GV =
1120 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
1121 const TargetMachine &TM = MBB.getParent()->getTarget();
1122 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1123 const unsigned char MO_NC = AArch64II::MO_NC;
1124
1125 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1126 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1127 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1128 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1129 .addReg(Reg, RegState::Kill).addImm(0)
1130 .addMemOperand(*MI->memoperands_begin());
1131 } else if (TM.getCodeModel() == CodeModel::Large) {
1132 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1133 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
1134 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1135 .addReg(Reg, RegState::Kill)
1136 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1137 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1138 .addReg(Reg, RegState::Kill)
1139 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1140 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1141 .addReg(Reg, RegState::Kill)
1142 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1143 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1144 .addReg(Reg, RegState::Kill).addImm(0)
1145 .addMemOperand(*MI->memoperands_begin());
1146 } else {
1147 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1148 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1149 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1150 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1151 .addReg(Reg, RegState::Kill)
1152 .addGlobalAddress(GV, 0, LoFlags)
1153 .addMemOperand(*MI->memoperands_begin());
1154 }
1155
1156 MBB.erase(MI);
1157
1158 return true;
1159}
1160
Tim Northover3b0846e2014-05-24 12:50:23 +00001161/// Return true if this is this instruction has a non-zero immediate
1162bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
1163 switch (MI->getOpcode()) {
1164 default:
1165 break;
1166 case AArch64::ADDSWrs:
1167 case AArch64::ADDSXrs:
1168 case AArch64::ADDWrs:
1169 case AArch64::ADDXrs:
1170 case AArch64::ANDSWrs:
1171 case AArch64::ANDSXrs:
1172 case AArch64::ANDWrs:
1173 case AArch64::ANDXrs:
1174 case AArch64::BICSWrs:
1175 case AArch64::BICSXrs:
1176 case AArch64::BICWrs:
1177 case AArch64::BICXrs:
1178 case AArch64::CRC32Brr:
1179 case AArch64::CRC32CBrr:
1180 case AArch64::CRC32CHrr:
1181 case AArch64::CRC32CWrr:
1182 case AArch64::CRC32CXrr:
1183 case AArch64::CRC32Hrr:
1184 case AArch64::CRC32Wrr:
1185 case AArch64::CRC32Xrr:
1186 case AArch64::EONWrs:
1187 case AArch64::EONXrs:
1188 case AArch64::EORWrs:
1189 case AArch64::EORXrs:
1190 case AArch64::ORNWrs:
1191 case AArch64::ORNXrs:
1192 case AArch64::ORRWrs:
1193 case AArch64::ORRXrs:
1194 case AArch64::SUBSWrs:
1195 case AArch64::SUBSXrs:
1196 case AArch64::SUBWrs:
1197 case AArch64::SUBXrs:
1198 if (MI->getOperand(3).isImm()) {
1199 unsigned val = MI->getOperand(3).getImm();
1200 return (val != 0);
1201 }
1202 break;
1203 }
1204 return false;
1205}
1206
1207/// Return true if this is this instruction has a non-zero immediate
1208bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
1209 switch (MI->getOpcode()) {
1210 default:
1211 break;
1212 case AArch64::ADDSWrx:
1213 case AArch64::ADDSXrx:
1214 case AArch64::ADDSXrx64:
1215 case AArch64::ADDWrx:
1216 case AArch64::ADDXrx:
1217 case AArch64::ADDXrx64:
1218 case AArch64::SUBSWrx:
1219 case AArch64::SUBSXrx:
1220 case AArch64::SUBSXrx64:
1221 case AArch64::SUBWrx:
1222 case AArch64::SUBXrx:
1223 case AArch64::SUBXrx64:
1224 if (MI->getOperand(3).isImm()) {
1225 unsigned val = MI->getOperand(3).getImm();
1226 return (val != 0);
1227 }
1228 break;
1229 }
1230
1231 return false;
1232}
1233
1234// Return true if this instruction simply sets its single destination register
1235// to zero. This is equivalent to a register rename of the zero-register.
1236bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const {
1237 switch (MI->getOpcode()) {
1238 default:
1239 break;
1240 case AArch64::MOVZWi:
1241 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1242 if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
1243 assert(MI->getDesc().getNumOperands() == 3 &&
1244 MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1245 return true;
1246 }
1247 break;
1248 case AArch64::ANDWri: // and Rd, Rzr, #imm
1249 return MI->getOperand(1).getReg() == AArch64::WZR;
1250 case AArch64::ANDXri:
1251 return MI->getOperand(1).getReg() == AArch64::XZR;
1252 case TargetOpcode::COPY:
1253 return MI->getOperand(1).getReg() == AArch64::WZR;
1254 }
1255 return false;
1256}
1257
1258// Return true if this instruction simply renames a general register without
1259// modifying bits.
1260bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
1261 switch (MI->getOpcode()) {
1262 default:
1263 break;
1264 case TargetOpcode::COPY: {
1265 // GPR32 copies will by lowered to ORRXrs
1266 unsigned DstReg = MI->getOperand(0).getReg();
1267 return (AArch64::GPR32RegClass.contains(DstReg) ||
1268 AArch64::GPR64RegClass.contains(DstReg));
1269 }
1270 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1271 if (MI->getOperand(1).getReg() == AArch64::XZR) {
1272 assert(MI->getDesc().getNumOperands() == 4 &&
1273 MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1274 return true;
1275 }
Renato Golin541d7e72014-08-01 17:27:31 +00001276 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001277 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1278 if (MI->getOperand(2).getImm() == 0) {
1279 assert(MI->getDesc().getNumOperands() == 4 &&
1280 MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1281 return true;
1282 }
Renato Golin541d7e72014-08-01 17:27:31 +00001283 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001284 }
1285 return false;
1286}
1287
1288// Return true if this instruction simply renames a general register without
1289// modifying bits.
1290bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
1291 switch (MI->getOpcode()) {
1292 default:
1293 break;
1294 case TargetOpcode::COPY: {
1295 // FPR64 copies will by lowered to ORR.16b
1296 unsigned DstReg = MI->getOperand(0).getReg();
1297 return (AArch64::FPR64RegClass.contains(DstReg) ||
1298 AArch64::FPR128RegClass.contains(DstReg));
1299 }
1300 case AArch64::ORRv16i8:
1301 if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
1302 assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
1303 "invalid ORRv16i8 operands");
1304 return true;
1305 }
Renato Golin541d7e72014-08-01 17:27:31 +00001306 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001307 }
1308 return false;
1309}
1310
1311unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
1312 int &FrameIndex) const {
1313 switch (MI->getOpcode()) {
1314 default:
1315 break;
1316 case AArch64::LDRWui:
1317 case AArch64::LDRXui:
1318 case AArch64::LDRBui:
1319 case AArch64::LDRHui:
1320 case AArch64::LDRSui:
1321 case AArch64::LDRDui:
1322 case AArch64::LDRQui:
1323 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
1324 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
1325 FrameIndex = MI->getOperand(1).getIndex();
1326 return MI->getOperand(0).getReg();
1327 }
1328 break;
1329 }
1330
1331 return 0;
1332}
1333
1334unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
1335 int &FrameIndex) const {
1336 switch (MI->getOpcode()) {
1337 default:
1338 break;
1339 case AArch64::STRWui:
1340 case AArch64::STRXui:
1341 case AArch64::STRBui:
1342 case AArch64::STRHui:
1343 case AArch64::STRSui:
1344 case AArch64::STRDui:
1345 case AArch64::STRQui:
1346 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
1347 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
1348 FrameIndex = MI->getOperand(1).getIndex();
1349 return MI->getOperand(0).getReg();
1350 }
1351 break;
1352 }
1353 return 0;
1354}
1355
1356/// Return true if this is load/store scales or extends its register offset.
1357/// This refers to scaling a dynamic index as opposed to scaled immediates.
1358/// MI should be a memory op that allows scaled addressing.
1359bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
1360 switch (MI->getOpcode()) {
1361 default:
1362 break;
1363 case AArch64::LDRBBroW:
1364 case AArch64::LDRBroW:
1365 case AArch64::LDRDroW:
1366 case AArch64::LDRHHroW:
1367 case AArch64::LDRHroW:
1368 case AArch64::LDRQroW:
1369 case AArch64::LDRSBWroW:
1370 case AArch64::LDRSBXroW:
1371 case AArch64::LDRSHWroW:
1372 case AArch64::LDRSHXroW:
1373 case AArch64::LDRSWroW:
1374 case AArch64::LDRSroW:
1375 case AArch64::LDRWroW:
1376 case AArch64::LDRXroW:
1377 case AArch64::STRBBroW:
1378 case AArch64::STRBroW:
1379 case AArch64::STRDroW:
1380 case AArch64::STRHHroW:
1381 case AArch64::STRHroW:
1382 case AArch64::STRQroW:
1383 case AArch64::STRSroW:
1384 case AArch64::STRWroW:
1385 case AArch64::STRXroW:
1386 case AArch64::LDRBBroX:
1387 case AArch64::LDRBroX:
1388 case AArch64::LDRDroX:
1389 case AArch64::LDRHHroX:
1390 case AArch64::LDRHroX:
1391 case AArch64::LDRQroX:
1392 case AArch64::LDRSBWroX:
1393 case AArch64::LDRSBXroX:
1394 case AArch64::LDRSHWroX:
1395 case AArch64::LDRSHXroX:
1396 case AArch64::LDRSWroX:
1397 case AArch64::LDRSroX:
1398 case AArch64::LDRWroX:
1399 case AArch64::LDRXroX:
1400 case AArch64::STRBBroX:
1401 case AArch64::STRBroX:
1402 case AArch64::STRDroX:
1403 case AArch64::STRHHroX:
1404 case AArch64::STRHroX:
1405 case AArch64::STRQroX:
1406 case AArch64::STRSroX:
1407 case AArch64::STRWroX:
1408 case AArch64::STRXroX:
1409
1410 unsigned Val = MI->getOperand(3).getImm();
1411 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1412 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1413 }
1414 return false;
1415}
1416
1417/// Check all MachineMemOperands for a hint to suppress pairing.
1418bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
1419 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
1420 "Too many target MO flags");
1421 for (auto *MM : MI->memoperands()) {
1422 if (MM->getFlags() &
1423 (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) {
1424 return true;
1425 }
1426 }
1427 return false;
1428}
1429
1430/// Set a flag on the first MachineMemOperand to suppress pairing.
1431void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
1432 if (MI->memoperands_empty())
1433 return;
1434
1435 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
1436 "Too many target MO flags");
1437 (*MI->memoperands_begin())
1438 ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
1439}
1440
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001441bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1442 switch (Opc) {
1443 default:
1444 return false;
1445 case AArch64::STURSi:
1446 case AArch64::STURDi:
1447 case AArch64::STURQi:
1448 case AArch64::STURBBi:
1449 case AArch64::STURHHi:
1450 case AArch64::STURWi:
1451 case AArch64::STURXi:
1452 case AArch64::LDURSi:
1453 case AArch64::LDURDi:
1454 case AArch64::LDURQi:
1455 case AArch64::LDURWi:
1456 case AArch64::LDURXi:
1457 case AArch64::LDURSWi:
1458 case AArch64::LDURHHi:
1459 case AArch64::LDURBBi:
1460 case AArch64::LDURSBWi:
1461 case AArch64::LDURSHWi:
1462 return true;
1463 }
1464}
1465
1466bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr *MI) const {
1467 return isUnscaledLdSt(MI->getOpcode());
1468}
1469
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001470// Is this a candidate for ld/st merging or pairing? For example, we don't
1471// touch volatiles or load/stores that have a hint to avoid pair formation.
1472bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr *MI) const {
1473 // If this is a volatile load/store, don't mess with it.
1474 if (MI->hasOrderedMemoryRef())
1475 return false;
1476
1477 // Make sure this is a reg+imm (as opposed to an address reloc).
1478 assert(MI->getOperand(1).isReg() && "Expected a reg operand.");
1479 if (!MI->getOperand(2).isImm())
1480 return false;
1481
1482 // Can't merge/pair if the instruction modifies the base register.
1483 // e.g., ldr x0, [x0]
1484 unsigned BaseReg = MI->getOperand(1).getReg();
1485 const TargetRegisterInfo *TRI = &getRegisterInfo();
1486 if (MI->modifiesRegister(BaseReg, TRI))
1487 return false;
1488
1489 // Check if this load/store has a hint to avoid pair formation.
1490 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1491 if (isLdStPairSuppressed(MI))
1492 return false;
1493
Evandro Menezes8d53f882016-04-13 18:31:45 +00001494 // Do not pair quad ld/st for Exynos.
1495 if (Subtarget.isExynosM1()) {
1496 switch (MI->getOpcode()) {
1497 default:
1498 break;
1499
1500 case AArch64::LDURQi:
1501 case AArch64::STURQi:
1502 case AArch64::LDRQui:
1503 case AArch64::STRQui:
1504 return false;
1505 }
1506 }
1507
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001508 return true;
1509}
1510
Chad Rosierc27a18f2016-03-09 16:00:35 +00001511bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
1512 MachineInstr *LdSt, unsigned &BaseReg, int64_t &Offset,
1513 const TargetRegisterInfo *TRI) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001514 switch (LdSt->getOpcode()) {
1515 default:
1516 return false;
Chad Rosier0da267d2016-03-09 16:46:48 +00001517 // Scaled instructions.
Tim Northover3b0846e2014-05-24 12:50:23 +00001518 case AArch64::STRSui:
1519 case AArch64::STRDui:
1520 case AArch64::STRQui:
1521 case AArch64::STRXui:
1522 case AArch64::STRWui:
1523 case AArch64::LDRSui:
1524 case AArch64::LDRDui:
1525 case AArch64::LDRQui:
1526 case AArch64::LDRXui:
1527 case AArch64::LDRWui:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001528 case AArch64::LDRSWui:
1529 // Unscaled instructions.
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001530 case AArch64::STURSi:
1531 case AArch64::STURDi:
1532 case AArch64::STURQi:
1533 case AArch64::STURXi:
1534 case AArch64::STURWi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001535 case AArch64::LDURSi:
1536 case AArch64::LDURDi:
1537 case AArch64::LDURQi:
1538 case AArch64::LDURWi:
1539 case AArch64::LDURXi:
1540 case AArch64::LDURSWi:
Chad Rosier0da267d2016-03-09 16:46:48 +00001541 unsigned Width;
1542 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001543 };
1544}
1545
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001546bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Chad Rosier0da267d2016-03-09 16:46:48 +00001547 MachineInstr *LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
Chad Rosier3528c1e2014-09-08 14:43:48 +00001548 const TargetRegisterInfo *TRI) const {
Chad Rosiercf173ff2016-03-21 18:04:10 +00001549 assert(LdSt->mayLoadOrStore() && "Expected a memory operation.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001550 // Handle only loads/stores with base register followed by immediate offset.
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001551 if (LdSt->getNumExplicitOperands() == 3) {
1552 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1553 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
1554 return false;
1555 } else if (LdSt->getNumExplicitOperands() == 4) {
1556 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1557 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isReg() || !LdSt->getOperand(3).isImm())
1558 return false;
1559 } else
Chad Rosier3528c1e2014-09-08 14:43:48 +00001560 return false;
1561
1562 // Offset is calculated as the immediate operand multiplied by the scaling factor.
1563 // Unscaled instructions have scaling factor set to 1.
Chad Rosier0da267d2016-03-09 16:46:48 +00001564 unsigned Scale = 0;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001565 switch (LdSt->getOpcode()) {
1566 default:
1567 return false;
1568 case AArch64::LDURQi:
1569 case AArch64::STURQi:
1570 Width = 16;
1571 Scale = 1;
1572 break;
1573 case AArch64::LDURXi:
1574 case AArch64::LDURDi:
1575 case AArch64::STURXi:
1576 case AArch64::STURDi:
1577 Width = 8;
1578 Scale = 1;
1579 break;
1580 case AArch64::LDURWi:
1581 case AArch64::LDURSi:
1582 case AArch64::LDURSWi:
1583 case AArch64::STURWi:
1584 case AArch64::STURSi:
1585 Width = 4;
1586 Scale = 1;
1587 break;
1588 case AArch64::LDURHi:
1589 case AArch64::LDURHHi:
1590 case AArch64::LDURSHXi:
1591 case AArch64::LDURSHWi:
1592 case AArch64::STURHi:
1593 case AArch64::STURHHi:
1594 Width = 2;
1595 Scale = 1;
1596 break;
1597 case AArch64::LDURBi:
1598 case AArch64::LDURBBi:
1599 case AArch64::LDURSBXi:
1600 case AArch64::LDURSBWi:
1601 case AArch64::STURBi:
1602 case AArch64::STURBBi:
1603 Width = 1;
1604 Scale = 1;
1605 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001606 case AArch64::LDPQi:
1607 case AArch64::LDNPQi:
1608 case AArch64::STPQi:
1609 case AArch64::STNPQi:
1610 Scale = 16;
1611 Width = 32;
1612 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001613 case AArch64::LDRQui:
1614 case AArch64::STRQui:
1615 Scale = Width = 16;
1616 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001617 case AArch64::LDPXi:
1618 case AArch64::LDPDi:
1619 case AArch64::LDNPXi:
1620 case AArch64::LDNPDi:
1621 case AArch64::STPXi:
1622 case AArch64::STPDi:
1623 case AArch64::STNPXi:
1624 case AArch64::STNPDi:
1625 Scale = 8;
1626 Width = 16;
1627 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001628 case AArch64::LDRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001629 case AArch64::LDRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001630 case AArch64::STRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001631 case AArch64::STRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001632 Scale = Width = 8;
1633 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001634 case AArch64::LDPWi:
1635 case AArch64::LDPSi:
1636 case AArch64::LDNPWi:
1637 case AArch64::LDNPSi:
1638 case AArch64::STPWi:
1639 case AArch64::STPSi:
1640 case AArch64::STNPWi:
1641 case AArch64::STNPSi:
1642 Scale = 4;
1643 Width = 8;
1644 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001645 case AArch64::LDRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001646 case AArch64::LDRSui:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001647 case AArch64::LDRSWui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001648 case AArch64::STRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001649 case AArch64::STRSui:
1650 Scale = Width = 4;
1651 break;
Chad Rosier84a0afd2015-09-18 14:13:18 +00001652 case AArch64::LDRHui:
1653 case AArch64::LDRHHui:
1654 case AArch64::STRHui:
1655 case AArch64::STRHHui:
1656 Scale = Width = 2;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001657 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001658 case AArch64::LDRBui:
1659 case AArch64::LDRBBui:
1660 case AArch64::STRBui:
1661 case AArch64::STRBBui:
1662 Scale = Width = 1;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001663 break;
Chad Rosier064261d2016-02-01 20:54:36 +00001664 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00001665
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001666 if (LdSt->getNumExplicitOperands() == 3) {
1667 BaseReg = LdSt->getOperand(1).getReg();
1668 Offset = LdSt->getOperand(2).getImm() * Scale;
1669 } else {
1670 assert(LdSt->getNumExplicitOperands() == 4 && "invalid number of operands");
1671 BaseReg = LdSt->getOperand(2).getReg();
1672 Offset = LdSt->getOperand(3).getImm() * Scale;
1673 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00001674 return true;
1675}
1676
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001677// Scale the unscaled offsets. Returns false if the unscaled offset can't be
1678// scaled.
1679static bool scaleOffset(unsigned Opc, int64_t &Offset) {
1680 unsigned OffsetStride = 1;
1681 switch (Opc) {
1682 default:
1683 return false;
1684 case AArch64::LDURQi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001685 case AArch64::STURQi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001686 OffsetStride = 16;
1687 break;
1688 case AArch64::LDURXi:
1689 case AArch64::LDURDi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001690 case AArch64::STURXi:
1691 case AArch64::STURDi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001692 OffsetStride = 8;
1693 break;
1694 case AArch64::LDURWi:
1695 case AArch64::LDURSi:
1696 case AArch64::LDURSWi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001697 case AArch64::STURWi:
1698 case AArch64::STURSi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001699 OffsetStride = 4;
1700 break;
1701 }
1702 // If the byte-offset isn't a multiple of the stride, we can't scale this
1703 // offset.
1704 if (Offset % OffsetStride != 0)
1705 return false;
1706
1707 // Convert the byte-offset used by unscaled into an "element" offset used
1708 // by the scaled pair load/store instructions.
1709 Offset /= OffsetStride;
1710 return true;
1711}
1712
1713static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
1714 if (FirstOpc == SecondOpc)
1715 return true;
1716 // We can also pair sign-ext and zero-ext instructions.
1717 switch (FirstOpc) {
1718 default:
1719 return false;
1720 case AArch64::LDRWui:
1721 case AArch64::LDURWi:
1722 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
1723 case AArch64::LDRSWui:
1724 case AArch64::LDURSWi:
1725 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
1726 }
1727 // These instructions can't be paired based on their opcodes.
1728 return false;
1729}
1730
Tim Northover3b0846e2014-05-24 12:50:23 +00001731/// Detect opportunities for ldp/stp formation.
1732///
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001733/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001734bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt,
1735 MachineInstr *SecondLdSt,
1736 unsigned NumLoads) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001737 // Only cluster up to a single pair.
1738 if (NumLoads > 1)
1739 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001740
1741 // Can we pair these instructions based on their opcodes?
1742 unsigned FirstOpc = FirstLdSt->getOpcode();
1743 unsigned SecondOpc = SecondLdSt->getOpcode();
1744 if (!canPairLdStOpc(FirstOpc, SecondOpc))
Tim Northover3b0846e2014-05-24 12:50:23 +00001745 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001746
1747 // Can't merge volatiles or load/stores that have a hint to avoid pair
1748 // formation, for example.
1749 if (!isCandidateToMergeOrPair(FirstLdSt) ||
1750 !isCandidateToMergeOrPair(SecondLdSt))
Tim Northover3b0846e2014-05-24 12:50:23 +00001751 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001752
1753 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
1754 int64_t Offset1 = FirstLdSt->getOperand(2).getImm();
1755 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
1756 return false;
1757
1758 int64_t Offset2 = SecondLdSt->getOperand(2).getImm();
1759 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
1760 return false;
1761
1762 // Pairwise instructions have a 7-bit signed offset field.
1763 if (Offset1 > 63 || Offset1 < -64)
1764 return false;
1765
Tim Northover3b0846e2014-05-24 12:50:23 +00001766 // The caller should already have ordered First/SecondLdSt by offset.
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001767 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
1768 return Offset1 + 1 == Offset2;
Tim Northover3b0846e2014-05-24 12:50:23 +00001769}
1770
1771bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
1772 MachineInstr *Second) const {
Matthias Braunc8b67e62015-07-20 23:11:42 +00001773 if (Subtarget.isCyclone()) {
1774 // Cyclone can fuse CMN, CMP, TST followed by Bcc.
1775 unsigned SecondOpcode = Second->getOpcode();
1776 if (SecondOpcode == AArch64::Bcc) {
1777 switch (First->getOpcode()) {
1778 default:
1779 return false;
1780 case AArch64::SUBSWri:
1781 case AArch64::ADDSWri:
1782 case AArch64::ANDSWri:
1783 case AArch64::SUBSXri:
1784 case AArch64::ADDSXri:
1785 case AArch64::ANDSXri:
1786 return true;
1787 }
Matthias Braune536f4f2015-07-20 22:34:47 +00001788 }
Matthias Braunc8b67e62015-07-20 23:11:42 +00001789 // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
1790 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
1791 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
1792 switch (First->getOpcode()) {
1793 default:
1794 return false;
1795 case AArch64::ADDWri:
1796 case AArch64::ADDXri:
1797 case AArch64::ANDWri:
1798 case AArch64::ANDXri:
1799 case AArch64::EORWri:
1800 case AArch64::EORXri:
1801 case AArch64::ORRWri:
1802 case AArch64::ORRXri:
1803 case AArch64::SUBWri:
1804 case AArch64::SUBXri:
1805 return true;
1806 }
Matthias Braune536f4f2015-07-20 22:34:47 +00001807 }
1808 }
1809 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001810}
1811
Adrian Prantl87b7eb92014-10-01 18:55:02 +00001812MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
1813 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
1814 const MDNode *Expr, DebugLoc DL) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001815 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1816 .addFrameIndex(FrameIx)
1817 .addImm(0)
1818 .addImm(Offset)
Adrian Prantl87b7eb92014-10-01 18:55:02 +00001819 .addMetadata(Var)
1820 .addMetadata(Expr);
Tim Northover3b0846e2014-05-24 12:50:23 +00001821 return &*MIB;
1822}
1823
1824static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
1825 unsigned Reg, unsigned SubIdx,
1826 unsigned State,
1827 const TargetRegisterInfo *TRI) {
1828 if (!SubIdx)
1829 return MIB.addReg(Reg, State);
1830
1831 if (TargetRegisterInfo::isPhysicalRegister(Reg))
1832 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1833 return MIB.addReg(Reg, State, SubIdx);
1834}
1835
1836static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1837 unsigned NumRegs) {
1838 // We really want the positive remainder mod 32 here, that happens to be
1839 // easily obtainable with a mask.
1840 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1841}
1842
1843void AArch64InstrInfo::copyPhysRegTuple(
1844 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
1845 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
1846 llvm::ArrayRef<unsigned> Indices) const {
Eric Christopher58f32662014-06-10 22:57:21 +00001847 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001848 "Unexpected register copy without NEON");
Eric Christophera0de2532015-03-18 20:37:30 +00001849 const TargetRegisterInfo *TRI = &getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001850 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
1851 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
1852 unsigned NumRegs = Indices.size();
1853
1854 int SubReg = 0, End = NumRegs, Incr = 1;
1855 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
1856 SubReg = NumRegs - 1;
1857 End = -1;
1858 Incr = -1;
1859 }
1860
1861 for (; SubReg != End; SubReg += Incr) {
James Molloyf8aa57a2015-04-16 11:37:40 +00001862 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
Tim Northover3b0846e2014-05-24 12:50:23 +00001863 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
1864 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
1865 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
1866 }
1867}
1868
1869void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
1870 MachineBasicBlock::iterator I, DebugLoc DL,
1871 unsigned DestReg, unsigned SrcReg,
1872 bool KillSrc) const {
1873 if (AArch64::GPR32spRegClass.contains(DestReg) &&
1874 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
Eric Christophera0de2532015-03-18 20:37:30 +00001875 const TargetRegisterInfo *TRI = &getRegisterInfo();
1876
Tim Northover3b0846e2014-05-24 12:50:23 +00001877 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
1878 // If either operand is WSP, expand to ADD #0.
1879 if (Subtarget.hasZeroCycleRegMove()) {
1880 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
1881 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1882 &AArch64::GPR64spRegClass);
1883 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1884 &AArch64::GPR64spRegClass);
1885 // This instruction is reading and writing X registers. This may upset
1886 // the register scavenger and machine verifier, so we need to indicate
1887 // that we are reading an undefined value from SrcRegX, but a proper
1888 // value from SrcReg.
1889 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
1890 .addReg(SrcRegX, RegState::Undef)
1891 .addImm(0)
1892 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1893 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1894 } else {
1895 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
1896 .addReg(SrcReg, getKillRegState(KillSrc))
1897 .addImm(0)
1898 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1899 }
1900 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
1901 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
1902 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1903 } else {
1904 if (Subtarget.hasZeroCycleRegMove()) {
1905 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
1906 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1907 &AArch64::GPR64spRegClass);
1908 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1909 &AArch64::GPR64spRegClass);
1910 // This instruction is reading and writing X registers. This may upset
1911 // the register scavenger and machine verifier, so we need to indicate
1912 // that we are reading an undefined value from SrcRegX, but a proper
1913 // value from SrcReg.
1914 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
1915 .addReg(AArch64::XZR)
1916 .addReg(SrcRegX, RegState::Undef)
1917 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1918 } else {
1919 // Otherwise, expand to ORR WZR.
1920 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
1921 .addReg(AArch64::WZR)
1922 .addReg(SrcReg, getKillRegState(KillSrc));
1923 }
1924 }
1925 return;
1926 }
1927
1928 if (AArch64::GPR64spRegClass.contains(DestReg) &&
1929 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
1930 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
1931 // If either operand is SP, expand to ADD #0.
1932 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
1933 .addReg(SrcReg, getKillRegState(KillSrc))
1934 .addImm(0)
1935 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1936 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
1937 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
1938 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1939 } else {
1940 // Otherwise, expand to ORR XZR.
1941 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
1942 .addReg(AArch64::XZR)
1943 .addReg(SrcReg, getKillRegState(KillSrc));
1944 }
1945 return;
1946 }
1947
1948 // Copy a DDDD register quad by copying the individual sub-registers.
1949 if (AArch64::DDDDRegClass.contains(DestReg) &&
1950 AArch64::DDDDRegClass.contains(SrcReg)) {
1951 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1952 AArch64::dsub2, AArch64::dsub3 };
1953 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1954 Indices);
1955 return;
1956 }
1957
1958 // Copy a DDD register triple by copying the individual sub-registers.
1959 if (AArch64::DDDRegClass.contains(DestReg) &&
1960 AArch64::DDDRegClass.contains(SrcReg)) {
1961 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1962 AArch64::dsub2 };
1963 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1964 Indices);
1965 return;
1966 }
1967
1968 // Copy a DD register pair by copying the individual sub-registers.
1969 if (AArch64::DDRegClass.contains(DestReg) &&
1970 AArch64::DDRegClass.contains(SrcReg)) {
1971 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
1972 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1973 Indices);
1974 return;
1975 }
1976
1977 // Copy a QQQQ register quad by copying the individual sub-registers.
1978 if (AArch64::QQQQRegClass.contains(DestReg) &&
1979 AArch64::QQQQRegClass.contains(SrcReg)) {
1980 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
1981 AArch64::qsub2, AArch64::qsub3 };
1982 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
1983 Indices);
1984 return;
1985 }
1986
1987 // Copy a QQQ register triple by copying the individual sub-registers.
1988 if (AArch64::QQQRegClass.contains(DestReg) &&
1989 AArch64::QQQRegClass.contains(SrcReg)) {
1990 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
1991 AArch64::qsub2 };
1992 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
1993 Indices);
1994 return;
1995 }
1996
1997 // Copy a QQ register pair by copying the individual sub-registers.
1998 if (AArch64::QQRegClass.contains(DestReg) &&
1999 AArch64::QQRegClass.contains(SrcReg)) {
2000 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
2001 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2002 Indices);
2003 return;
2004 }
2005
2006 if (AArch64::FPR128RegClass.contains(DestReg) &&
2007 AArch64::FPR128RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002008 if(Subtarget.hasNEON()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002009 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2010 .addReg(SrcReg)
2011 .addReg(SrcReg, getKillRegState(KillSrc));
2012 } else {
2013 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2014 .addReg(AArch64::SP, RegState::Define)
2015 .addReg(SrcReg, getKillRegState(KillSrc))
2016 .addReg(AArch64::SP)
2017 .addImm(-16);
2018 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2019 .addReg(AArch64::SP, RegState::Define)
2020 .addReg(DestReg, RegState::Define)
2021 .addReg(AArch64::SP)
2022 .addImm(16);
2023 }
2024 return;
2025 }
2026
2027 if (AArch64::FPR64RegClass.contains(DestReg) &&
2028 AArch64::FPR64RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002029 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002030 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2031 &AArch64::FPR128RegClass);
2032 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2033 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002034 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2035 .addReg(SrcReg)
2036 .addReg(SrcReg, getKillRegState(KillSrc));
2037 } else {
2038 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2039 .addReg(SrcReg, getKillRegState(KillSrc));
2040 }
2041 return;
2042 }
2043
2044 if (AArch64::FPR32RegClass.contains(DestReg) &&
2045 AArch64::FPR32RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002046 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002047 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2048 &AArch64::FPR128RegClass);
2049 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2050 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002051 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2052 .addReg(SrcReg)
2053 .addReg(SrcReg, getKillRegState(KillSrc));
2054 } else {
2055 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2056 .addReg(SrcReg, getKillRegState(KillSrc));
2057 }
2058 return;
2059 }
2060
2061 if (AArch64::FPR16RegClass.contains(DestReg) &&
2062 AArch64::FPR16RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002063 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002064 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2065 &AArch64::FPR128RegClass);
2066 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2067 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002068 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2069 .addReg(SrcReg)
2070 .addReg(SrcReg, getKillRegState(KillSrc));
2071 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002072 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2073 &AArch64::FPR32RegClass);
2074 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2075 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002076 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2077 .addReg(SrcReg, getKillRegState(KillSrc));
2078 }
2079 return;
2080 }
2081
2082 if (AArch64::FPR8RegClass.contains(DestReg) &&
2083 AArch64::FPR8RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002084 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002085 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
Tim Northover3b0846e2014-05-24 12:50:23 +00002086 &AArch64::FPR128RegClass);
Eric Christophera0de2532015-03-18 20:37:30 +00002087 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2088 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002089 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2090 .addReg(SrcReg)
2091 .addReg(SrcReg, getKillRegState(KillSrc));
2092 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002093 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2094 &AArch64::FPR32RegClass);
2095 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2096 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002097 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2098 .addReg(SrcReg, getKillRegState(KillSrc));
2099 }
2100 return;
2101 }
2102
2103 // Copies between GPR64 and FPR64.
2104 if (AArch64::FPR64RegClass.contains(DestReg) &&
2105 AArch64::GPR64RegClass.contains(SrcReg)) {
2106 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2107 .addReg(SrcReg, getKillRegState(KillSrc));
2108 return;
2109 }
2110 if (AArch64::GPR64RegClass.contains(DestReg) &&
2111 AArch64::FPR64RegClass.contains(SrcReg)) {
2112 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2113 .addReg(SrcReg, getKillRegState(KillSrc));
2114 return;
2115 }
2116 // Copies between GPR32 and FPR32.
2117 if (AArch64::FPR32RegClass.contains(DestReg) &&
2118 AArch64::GPR32RegClass.contains(SrcReg)) {
2119 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2120 .addReg(SrcReg, getKillRegState(KillSrc));
2121 return;
2122 }
2123 if (AArch64::GPR32RegClass.contains(DestReg) &&
2124 AArch64::FPR32RegClass.contains(SrcReg)) {
2125 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2126 .addReg(SrcReg, getKillRegState(KillSrc));
2127 return;
2128 }
2129
Tim Northover1bed9af2014-05-27 12:16:02 +00002130 if (DestReg == AArch64::NZCV) {
2131 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2132 BuildMI(MBB, I, DL, get(AArch64::MSR))
2133 .addImm(AArch64SysReg::NZCV)
2134 .addReg(SrcReg, getKillRegState(KillSrc))
2135 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2136 return;
2137 }
2138
2139 if (SrcReg == AArch64::NZCV) {
2140 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
Quentin Colombet658d9db2016-04-22 18:46:17 +00002141 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
Tim Northover1bed9af2014-05-27 12:16:02 +00002142 .addImm(AArch64SysReg::NZCV)
2143 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2144 return;
2145 }
2146
2147 llvm_unreachable("unimplemented reg-to-reg copy");
Tim Northover3b0846e2014-05-24 12:50:23 +00002148}
2149
2150void AArch64InstrInfo::storeRegToStackSlot(
2151 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2152 bool isKill, int FI, const TargetRegisterClass *RC,
2153 const TargetRegisterInfo *TRI) const {
2154 DebugLoc DL;
2155 if (MBBI != MBB.end())
2156 DL = MBBI->getDebugLoc();
2157 MachineFunction &MF = *MBB.getParent();
2158 MachineFrameInfo &MFI = *MF.getFrameInfo();
2159 unsigned Align = MFI.getObjectAlignment(FI);
2160
Alex Lorenze40c8a22015-08-11 23:09:45 +00002161 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002162 MachineMemOperand *MMO = MF.getMachineMemOperand(
2163 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2164 unsigned Opc = 0;
2165 bool Offset = true;
2166 switch (RC->getSize()) {
2167 case 1:
2168 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2169 Opc = AArch64::STRBui;
2170 break;
2171 case 2:
2172 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2173 Opc = AArch64::STRHui;
2174 break;
2175 case 4:
2176 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2177 Opc = AArch64::STRWui;
2178 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2179 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2180 else
2181 assert(SrcReg != AArch64::WSP);
2182 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2183 Opc = AArch64::STRSui;
2184 break;
2185 case 8:
2186 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2187 Opc = AArch64::STRXui;
2188 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2189 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2190 else
2191 assert(SrcReg != AArch64::SP);
2192 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2193 Opc = AArch64::STRDui;
2194 break;
2195 case 16:
2196 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2197 Opc = AArch64::STRQui;
2198 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002199 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002200 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002201 Opc = AArch64::ST1Twov1d;
2202 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002203 }
2204 break;
2205 case 24:
2206 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002207 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002208 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002209 Opc = AArch64::ST1Threev1d;
2210 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002211 }
2212 break;
2213 case 32:
2214 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002215 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002216 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002217 Opc = AArch64::ST1Fourv1d;
2218 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002219 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002220 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002221 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002222 Opc = AArch64::ST1Twov2d;
2223 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002224 }
2225 break;
2226 case 48:
2227 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002228 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002229 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002230 Opc = AArch64::ST1Threev2d;
2231 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002232 }
2233 break;
2234 case 64:
2235 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002236 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002237 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002238 Opc = AArch64::ST1Fourv2d;
2239 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002240 }
2241 break;
2242 }
2243 assert(Opc && "Unknown register class");
2244
James Molloyf8aa57a2015-04-16 11:37:40 +00002245 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002246 .addReg(SrcReg, getKillRegState(isKill))
2247 .addFrameIndex(FI);
2248
2249 if (Offset)
2250 MI.addImm(0);
2251 MI.addMemOperand(MMO);
2252}
2253
2254void AArch64InstrInfo::loadRegFromStackSlot(
2255 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2256 int FI, const TargetRegisterClass *RC,
2257 const TargetRegisterInfo *TRI) const {
2258 DebugLoc DL;
2259 if (MBBI != MBB.end())
2260 DL = MBBI->getDebugLoc();
2261 MachineFunction &MF = *MBB.getParent();
2262 MachineFrameInfo &MFI = *MF.getFrameInfo();
2263 unsigned Align = MFI.getObjectAlignment(FI);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002264 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002265 MachineMemOperand *MMO = MF.getMachineMemOperand(
2266 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2267
2268 unsigned Opc = 0;
2269 bool Offset = true;
2270 switch (RC->getSize()) {
2271 case 1:
2272 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2273 Opc = AArch64::LDRBui;
2274 break;
2275 case 2:
2276 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2277 Opc = AArch64::LDRHui;
2278 break;
2279 case 4:
2280 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2281 Opc = AArch64::LDRWui;
2282 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2283 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2284 else
2285 assert(DestReg != AArch64::WSP);
2286 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2287 Opc = AArch64::LDRSui;
2288 break;
2289 case 8:
2290 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2291 Opc = AArch64::LDRXui;
2292 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2293 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2294 else
2295 assert(DestReg != AArch64::SP);
2296 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2297 Opc = AArch64::LDRDui;
2298 break;
2299 case 16:
2300 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2301 Opc = AArch64::LDRQui;
2302 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002303 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002304 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002305 Opc = AArch64::LD1Twov1d;
2306 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002307 }
2308 break;
2309 case 24:
2310 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002311 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002312 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002313 Opc = AArch64::LD1Threev1d;
2314 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002315 }
2316 break;
2317 case 32:
2318 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002319 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002320 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002321 Opc = AArch64::LD1Fourv1d;
2322 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002323 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002324 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002325 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002326 Opc = AArch64::LD1Twov2d;
2327 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002328 }
2329 break;
2330 case 48:
2331 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002332 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002333 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002334 Opc = AArch64::LD1Threev2d;
2335 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002336 }
2337 break;
2338 case 64:
2339 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002340 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002341 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002342 Opc = AArch64::LD1Fourv2d;
2343 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002344 }
2345 break;
2346 }
2347 assert(Opc && "Unknown register class");
2348
James Molloyf8aa57a2015-04-16 11:37:40 +00002349 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002350 .addReg(DestReg, getDefRegState(true))
2351 .addFrameIndex(FI);
2352 if (Offset)
2353 MI.addImm(0);
2354 MI.addMemOperand(MMO);
2355}
2356
2357void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2358 MachineBasicBlock::iterator MBBI, DebugLoc DL,
2359 unsigned DestReg, unsigned SrcReg, int Offset,
Eric Christopherbc76b972014-06-10 17:33:39 +00002360 const TargetInstrInfo *TII,
Tim Northover3b0846e2014-05-24 12:50:23 +00002361 MachineInstr::MIFlag Flag, bool SetNZCV) {
2362 if (DestReg == SrcReg && Offset == 0)
2363 return;
2364
2365 bool isSub = Offset < 0;
2366 if (isSub)
2367 Offset = -Offset;
2368
2369 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2370 // scratch register. If DestReg is a virtual register, use it as the
2371 // scratch register; otherwise, create a new virtual register (to be
2372 // replaced by the scavenger at the end of PEI). That case can be optimized
2373 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2374 // register can be loaded with offset%8 and the add/sub can use an extending
2375 // instruction with LSL#3.
2376 // Currently the function handles any offsets but generates a poor sequence
2377 // of code.
2378 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2379
2380 unsigned Opc;
2381 if (SetNZCV)
2382 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2383 else
2384 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2385 const unsigned MaxEncoding = 0xfff;
2386 const unsigned ShiftSize = 12;
2387 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2388 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2389 unsigned ThisVal;
2390 if (((unsigned)Offset) > MaxEncodableValue) {
2391 ThisVal = MaxEncodableValue;
2392 } else {
2393 ThisVal = Offset & MaxEncodableValue;
2394 }
2395 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2396 "Encoding cannot handle value that big");
2397 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2398 .addReg(SrcReg)
2399 .addImm(ThisVal >> ShiftSize)
2400 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2401 .setMIFlag(Flag);
2402
2403 SrcReg = DestReg;
2404 Offset -= ThisVal;
2405 if (Offset == 0)
2406 return;
2407 }
2408 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2409 .addReg(SrcReg)
2410 .addImm(Offset)
2411 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2412 .setMIFlag(Flag);
2413}
2414
Keno Fischere70b31f2015-06-08 20:09:58 +00002415MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
2416 MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
2417 MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002418 // This is a bit of a hack. Consider this instruction:
2419 //
2420 // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2421 //
2422 // We explicitly chose GPR64all for the virtual register so such a copy might
2423 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2424 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2425 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2426 //
2427 // To prevent that, we are going to constrain the %vreg0 register class here.
2428 //
2429 // <rdar://problem/11522048>
2430 //
2431 if (MI->isCopy()) {
2432 unsigned DstReg = MI->getOperand(0).getReg();
2433 unsigned SrcReg = MI->getOperand(1).getReg();
2434 if (SrcReg == AArch64::SP &&
2435 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2436 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2437 return nullptr;
2438 }
2439 if (DstReg == AArch64::SP &&
2440 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2441 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2442 return nullptr;
2443 }
2444 }
2445
2446 // Cannot fold.
2447 return nullptr;
2448}
2449
2450int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2451 bool *OutUseUnscaledOp,
2452 unsigned *OutUnscaledOp,
2453 int *EmittableOffset) {
2454 int Scale = 1;
2455 bool IsSigned = false;
2456 // The ImmIdx should be changed case by case if it is not 2.
2457 unsigned ImmIdx = 2;
2458 unsigned UnscaledOp = 0;
2459 // Set output values in case of early exit.
2460 if (EmittableOffset)
2461 *EmittableOffset = 0;
2462 if (OutUseUnscaledOp)
2463 *OutUseUnscaledOp = false;
2464 if (OutUnscaledOp)
2465 *OutUnscaledOp = 0;
2466 switch (MI.getOpcode()) {
2467 default:
Craig Topper2a30d782014-06-18 05:05:13 +00002468 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
Tim Northover3b0846e2014-05-24 12:50:23 +00002469 // Vector spills/fills can't take an immediate offset.
2470 case AArch64::LD1Twov2d:
2471 case AArch64::LD1Threev2d:
2472 case AArch64::LD1Fourv2d:
2473 case AArch64::LD1Twov1d:
2474 case AArch64::LD1Threev1d:
2475 case AArch64::LD1Fourv1d:
2476 case AArch64::ST1Twov2d:
2477 case AArch64::ST1Threev2d:
2478 case AArch64::ST1Fourv2d:
2479 case AArch64::ST1Twov1d:
2480 case AArch64::ST1Threev1d:
2481 case AArch64::ST1Fourv1d:
2482 return AArch64FrameOffsetCannotUpdate;
2483 case AArch64::PRFMui:
2484 Scale = 8;
2485 UnscaledOp = AArch64::PRFUMi;
2486 break;
2487 case AArch64::LDRXui:
2488 Scale = 8;
2489 UnscaledOp = AArch64::LDURXi;
2490 break;
2491 case AArch64::LDRWui:
2492 Scale = 4;
2493 UnscaledOp = AArch64::LDURWi;
2494 break;
2495 case AArch64::LDRBui:
2496 Scale = 1;
2497 UnscaledOp = AArch64::LDURBi;
2498 break;
2499 case AArch64::LDRHui:
2500 Scale = 2;
2501 UnscaledOp = AArch64::LDURHi;
2502 break;
2503 case AArch64::LDRSui:
2504 Scale = 4;
2505 UnscaledOp = AArch64::LDURSi;
2506 break;
2507 case AArch64::LDRDui:
2508 Scale = 8;
2509 UnscaledOp = AArch64::LDURDi;
2510 break;
2511 case AArch64::LDRQui:
2512 Scale = 16;
2513 UnscaledOp = AArch64::LDURQi;
2514 break;
2515 case AArch64::LDRBBui:
2516 Scale = 1;
2517 UnscaledOp = AArch64::LDURBBi;
2518 break;
2519 case AArch64::LDRHHui:
2520 Scale = 2;
2521 UnscaledOp = AArch64::LDURHHi;
2522 break;
2523 case AArch64::LDRSBXui:
2524 Scale = 1;
2525 UnscaledOp = AArch64::LDURSBXi;
2526 break;
2527 case AArch64::LDRSBWui:
2528 Scale = 1;
2529 UnscaledOp = AArch64::LDURSBWi;
2530 break;
2531 case AArch64::LDRSHXui:
2532 Scale = 2;
2533 UnscaledOp = AArch64::LDURSHXi;
2534 break;
2535 case AArch64::LDRSHWui:
2536 Scale = 2;
2537 UnscaledOp = AArch64::LDURSHWi;
2538 break;
2539 case AArch64::LDRSWui:
2540 Scale = 4;
2541 UnscaledOp = AArch64::LDURSWi;
2542 break;
2543
2544 case AArch64::STRXui:
2545 Scale = 8;
2546 UnscaledOp = AArch64::STURXi;
2547 break;
2548 case AArch64::STRWui:
2549 Scale = 4;
2550 UnscaledOp = AArch64::STURWi;
2551 break;
2552 case AArch64::STRBui:
2553 Scale = 1;
2554 UnscaledOp = AArch64::STURBi;
2555 break;
2556 case AArch64::STRHui:
2557 Scale = 2;
2558 UnscaledOp = AArch64::STURHi;
2559 break;
2560 case AArch64::STRSui:
2561 Scale = 4;
2562 UnscaledOp = AArch64::STURSi;
2563 break;
2564 case AArch64::STRDui:
2565 Scale = 8;
2566 UnscaledOp = AArch64::STURDi;
2567 break;
2568 case AArch64::STRQui:
2569 Scale = 16;
2570 UnscaledOp = AArch64::STURQi;
2571 break;
2572 case AArch64::STRBBui:
2573 Scale = 1;
2574 UnscaledOp = AArch64::STURBBi;
2575 break;
2576 case AArch64::STRHHui:
2577 Scale = 2;
2578 UnscaledOp = AArch64::STURHHi;
2579 break;
2580
2581 case AArch64::LDPXi:
2582 case AArch64::LDPDi:
2583 case AArch64::STPXi:
2584 case AArch64::STPDi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002585 case AArch64::LDNPXi:
2586 case AArch64::LDNPDi:
2587 case AArch64::STNPXi:
2588 case AArch64::STNPDi:
2589 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002590 IsSigned = true;
2591 Scale = 8;
2592 break;
2593 case AArch64::LDPQi:
2594 case AArch64::STPQi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002595 case AArch64::LDNPQi:
2596 case AArch64::STNPQi:
2597 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002598 IsSigned = true;
2599 Scale = 16;
2600 break;
2601 case AArch64::LDPWi:
2602 case AArch64::LDPSi:
2603 case AArch64::STPWi:
2604 case AArch64::STPSi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002605 case AArch64::LDNPWi:
2606 case AArch64::LDNPSi:
2607 case AArch64::STNPWi:
2608 case AArch64::STNPSi:
2609 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002610 IsSigned = true;
2611 Scale = 4;
2612 break;
2613
2614 case AArch64::LDURXi:
2615 case AArch64::LDURWi:
2616 case AArch64::LDURBi:
2617 case AArch64::LDURHi:
2618 case AArch64::LDURSi:
2619 case AArch64::LDURDi:
2620 case AArch64::LDURQi:
2621 case AArch64::LDURHHi:
2622 case AArch64::LDURBBi:
2623 case AArch64::LDURSBXi:
2624 case AArch64::LDURSBWi:
2625 case AArch64::LDURSHXi:
2626 case AArch64::LDURSHWi:
2627 case AArch64::LDURSWi:
2628 case AArch64::STURXi:
2629 case AArch64::STURWi:
2630 case AArch64::STURBi:
2631 case AArch64::STURHi:
2632 case AArch64::STURSi:
2633 case AArch64::STURDi:
2634 case AArch64::STURQi:
2635 case AArch64::STURBBi:
2636 case AArch64::STURHHi:
2637 Scale = 1;
2638 break;
2639 }
2640
2641 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2642
2643 bool useUnscaledOp = false;
2644 // If the offset doesn't match the scale, we rewrite the instruction to
2645 // use the unscaled instruction instead. Likewise, if we have a negative
2646 // offset (and have an unscaled op to use).
2647 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2648 useUnscaledOp = true;
2649
2650 // Use an unscaled addressing mode if the instruction has a negative offset
2651 // (or if the instruction is already using an unscaled addressing mode).
2652 unsigned MaskBits;
2653 if (IsSigned) {
2654 // ldp/stp instructions.
2655 MaskBits = 7;
2656 Offset /= Scale;
2657 } else if (UnscaledOp == 0 || useUnscaledOp) {
2658 MaskBits = 9;
2659 IsSigned = true;
2660 Scale = 1;
2661 } else {
2662 MaskBits = 12;
2663 IsSigned = false;
2664 Offset /= Scale;
2665 }
2666
2667 // Attempt to fold address computation.
2668 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2669 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2670 if (Offset >= MinOff && Offset <= MaxOff) {
2671 if (EmittableOffset)
2672 *EmittableOffset = Offset;
2673 Offset = 0;
2674 } else {
2675 int NewOff = Offset < 0 ? MinOff : MaxOff;
2676 if (EmittableOffset)
2677 *EmittableOffset = NewOff;
2678 Offset = (Offset - NewOff) * Scale;
2679 }
2680 if (OutUseUnscaledOp)
2681 *OutUseUnscaledOp = useUnscaledOp;
2682 if (OutUnscaledOp)
2683 *OutUnscaledOp = UnscaledOp;
2684 return AArch64FrameOffsetCanUpdate |
2685 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2686}
2687
2688bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2689 unsigned FrameReg, int &Offset,
2690 const AArch64InstrInfo *TII) {
2691 unsigned Opcode = MI.getOpcode();
2692 unsigned ImmIdx = FrameRegIdx + 1;
2693
2694 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
2695 Offset += MI.getOperand(ImmIdx).getImm();
2696 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
2697 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
2698 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
2699 MI.eraseFromParent();
2700 Offset = 0;
2701 return true;
2702 }
2703
2704 int NewOffset;
2705 unsigned UnscaledOp;
2706 bool UseUnscaledOp;
2707 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
2708 &UnscaledOp, &NewOffset);
2709 if (Status & AArch64FrameOffsetCanUpdate) {
2710 if (Status & AArch64FrameOffsetIsLegal)
2711 // Replace the FrameIndex with FrameReg.
2712 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2713 if (UseUnscaledOp)
2714 MI.setDesc(TII->get(UnscaledOp));
2715
2716 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
2717 return Offset == 0;
2718 }
2719
2720 return false;
2721}
2722
2723void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
2724 NopInst.setOpcode(AArch64::HINT);
Jim Grosbache9119e42015-05-13 18:37:00 +00002725 NopInst.addOperand(MCOperand::createImm(0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002726}
Chad Rosier9d1a5562016-05-02 14:56:21 +00002727
2728// AArch64 supports MachineCombiner.
Benjamin Kramer8c90fd72014-09-03 11:41:21 +00002729bool AArch64InstrInfo::useMachineCombiner() const {
Chad Rosier9d1a5562016-05-02 14:56:21 +00002730
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002731 return true;
2732}
2733//
2734// True when Opc sets flag
2735static bool isCombineInstrSettingFlag(unsigned Opc) {
2736 switch (Opc) {
2737 case AArch64::ADDSWrr:
2738 case AArch64::ADDSWri:
2739 case AArch64::ADDSXrr:
2740 case AArch64::ADDSXri:
2741 case AArch64::SUBSWrr:
2742 case AArch64::SUBSXrr:
2743 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2744 case AArch64::SUBSWri:
2745 case AArch64::SUBSXri:
2746 return true;
2747 default:
2748 break;
2749 }
2750 return false;
2751}
2752//
2753// 32b Opcodes that can be combined with a MUL
2754static bool isCombineInstrCandidate32(unsigned Opc) {
2755 switch (Opc) {
2756 case AArch64::ADDWrr:
2757 case AArch64::ADDWri:
2758 case AArch64::SUBWrr:
2759 case AArch64::ADDSWrr:
2760 case AArch64::ADDSWri:
2761 case AArch64::SUBSWrr:
2762 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2763 case AArch64::SUBWri:
2764 case AArch64::SUBSWri:
2765 return true;
2766 default:
2767 break;
2768 }
2769 return false;
2770}
2771//
2772// 64b Opcodes that can be combined with a MUL
2773static bool isCombineInstrCandidate64(unsigned Opc) {
2774 switch (Opc) {
2775 case AArch64::ADDXrr:
2776 case AArch64::ADDXri:
2777 case AArch64::SUBXrr:
2778 case AArch64::ADDSXrr:
2779 case AArch64::ADDSXri:
2780 case AArch64::SUBSXrr:
2781 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2782 case AArch64::SUBXri:
2783 case AArch64::SUBSXri:
2784 return true;
2785 default:
2786 break;
2787 }
2788 return false;
2789}
2790//
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002791// FP Opcodes that can be combined with a FMUL
2792static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
2793 switch (Inst.getOpcode()) {
2794 case AArch64::FADDSrr:
2795 case AArch64::FADDDrr:
2796 case AArch64::FADDv2f32:
2797 case AArch64::FADDv2f64:
2798 case AArch64::FADDv4f32:
2799 case AArch64::FSUBSrr:
2800 case AArch64::FSUBDrr:
2801 case AArch64::FSUBv2f32:
2802 case AArch64::FSUBv2f64:
2803 case AArch64::FSUBv4f32:
2804 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2805 default:
2806 break;
2807 }
2808 return false;
2809}
2810//
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002811// Opcodes that can be combined with a MUL
2812static bool isCombineInstrCandidate(unsigned Opc) {
2813 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
2814}
2815
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002816//
2817// Utility routine that checks if \param MO is defined by an
2818// \param CombineOpc instruction in the basic block \param MBB
2819static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
2820 unsigned CombineOpc, unsigned ZeroReg = 0,
2821 bool CheckZeroReg = false) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002822 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2823 MachineInstr *MI = nullptr;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002824
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002825 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2826 MI = MRI.getUniqueVRegDef(MO.getReg());
2827 // And it needs to be in the trace (otherwise, it won't have a depth).
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002828 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002829 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002830 // Must only used by the user we combine with.
Gerolf Hoflehnerfe2c11f2014-08-13 22:07:36 +00002831 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002832 return false;
2833
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002834 if (CheckZeroReg) {
2835 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
2836 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
2837 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
2838 // The third input reg must be zero.
2839 if (MI->getOperand(3).getReg() != ZeroReg)
2840 return false;
2841 }
2842
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002843 return true;
2844}
2845
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002846//
2847// Is \param MO defined by an integer multiply and can be combined?
2848static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2849 unsigned MulOpc, unsigned ZeroReg) {
2850 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
2851}
2852
2853//
2854// Is \param MO defined by a floating-point multiply and can be combined?
2855static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2856 unsigned MulOpc) {
2857 return canCombine(MBB, MO, MulOpc);
2858}
2859
Haicheng Wu08b94622016-01-07 04:01:02 +00002860// TODO: There are many more machine instruction opcodes to match:
2861// 1. Other data types (integer, vectors)
2862// 2. Other math / logic operations (xor, or)
2863// 3. Other forms of the same operation (intrinsics and other variants)
2864bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
2865 switch (Inst.getOpcode()) {
2866 case AArch64::FADDDrr:
2867 case AArch64::FADDSrr:
2868 case AArch64::FADDv2f32:
2869 case AArch64::FADDv2f64:
2870 case AArch64::FADDv4f32:
2871 case AArch64::FMULDrr:
2872 case AArch64::FMULSrr:
2873 case AArch64::FMULX32:
2874 case AArch64::FMULX64:
2875 case AArch64::FMULXv2f32:
2876 case AArch64::FMULXv2f64:
2877 case AArch64::FMULXv4f32:
2878 case AArch64::FMULv2f32:
2879 case AArch64::FMULv2f64:
2880 case AArch64::FMULv4f32:
2881 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2882 default:
2883 return false;
2884 }
2885}
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002886
Haicheng Wu08b94622016-01-07 04:01:02 +00002887/// Find instructions that can be turned into madd.
2888static bool getMaddPatterns(MachineInstr &Root,
2889 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002890 unsigned Opc = Root.getOpcode();
2891 MachineBasicBlock &MBB = *Root.getParent();
2892 bool Found = false;
2893
2894 if (!isCombineInstrCandidate(Opc))
Chad Rosier85c85942016-03-23 20:07:28 +00002895 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002896 if (isCombineInstrSettingFlag(Opc)) {
2897 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
2898 // When NZCV is live bail out.
2899 if (Cmp_NZCV == -1)
Chad Rosier85c85942016-03-23 20:07:28 +00002900 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002901 unsigned NewOpc = convertFlagSettingOpcode(&Root);
2902 // When opcode can't change bail out.
2903 // CHECKME: do we miss any cases for opcode conversion?
2904 if (NewOpc == Opc)
Chad Rosier85c85942016-03-23 20:07:28 +00002905 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002906 Opc = NewOpc;
2907 }
2908
2909 switch (Opc) {
2910 default:
2911 break;
2912 case AArch64::ADDWrr:
2913 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
2914 "ADDWrr does not have register operands");
2915 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2916 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002917 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002918 Found = true;
2919 }
2920 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2921 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002922 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002923 Found = true;
2924 }
2925 break;
2926 case AArch64::ADDXrr:
2927 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2928 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002929 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002930 Found = true;
2931 }
2932 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2933 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002934 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002935 Found = true;
2936 }
2937 break;
2938 case AArch64::SUBWrr:
2939 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2940 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002941 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002942 Found = true;
2943 }
2944 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2945 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002946 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002947 Found = true;
2948 }
2949 break;
2950 case AArch64::SUBXrr:
2951 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2952 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002953 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002954 Found = true;
2955 }
2956 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2957 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002958 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002959 Found = true;
2960 }
2961 break;
2962 case AArch64::ADDWri:
2963 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2964 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002965 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002966 Found = true;
2967 }
2968 break;
2969 case AArch64::ADDXri:
2970 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2971 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002972 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002973 Found = true;
2974 }
2975 break;
2976 case AArch64::SUBWri:
2977 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2978 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002979 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002980 Found = true;
2981 }
2982 break;
2983 case AArch64::SUBXri:
2984 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2985 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00002986 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002987 Found = true;
2988 }
2989 break;
2990 }
2991 return Found;
2992}
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002993/// Floating-Point Support
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002994
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002995/// Find instructions that can be turned into madd.
2996static bool getFMAPatterns(MachineInstr &Root,
2997 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
2998
2999 if (!isCombineInstrCandidateFP(Root))
3000 return 0;
3001
3002 MachineBasicBlock &MBB = *Root.getParent();
3003 bool Found = false;
3004
3005 switch (Root.getOpcode()) {
3006 default:
3007 assert(false && "Unsupported FP instruction in combiner\n");
3008 break;
3009 case AArch64::FADDSrr:
3010 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3011 "FADDWrr does not have register operands");
3012 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3013 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3014 Found = true;
3015 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3016 AArch64::FMULv1i32_indexed)) {
3017 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3018 Found = true;
3019 }
3020 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3021 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3022 Found = true;
3023 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3024 AArch64::FMULv1i32_indexed)) {
3025 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3026 Found = true;
3027 }
3028 break;
3029 case AArch64::FADDDrr:
3030 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3031 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3032 Found = true;
3033 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3034 AArch64::FMULv1i64_indexed)) {
3035 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3036 Found = true;
3037 }
3038 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3039 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3040 Found = true;
3041 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3042 AArch64::FMULv1i64_indexed)) {
3043 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3044 Found = true;
3045 }
3046 break;
3047 case AArch64::FADDv2f32:
3048 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3049 AArch64::FMULv2i32_indexed)) {
3050 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3051 Found = true;
3052 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3053 AArch64::FMULv2f32)) {
3054 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3055 Found = true;
3056 }
3057 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3058 AArch64::FMULv2i32_indexed)) {
3059 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3060 Found = true;
3061 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3062 AArch64::FMULv2f32)) {
3063 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3064 Found = true;
3065 }
3066 break;
3067 case AArch64::FADDv2f64:
3068 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3069 AArch64::FMULv2i64_indexed)) {
3070 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3071 Found = true;
3072 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3073 AArch64::FMULv2f64)) {
3074 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3075 Found = true;
3076 }
3077 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3078 AArch64::FMULv2i64_indexed)) {
3079 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3080 Found = true;
3081 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3082 AArch64::FMULv2f64)) {
3083 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3084 Found = true;
3085 }
3086 break;
3087 case AArch64::FADDv4f32:
3088 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3089 AArch64::FMULv4i32_indexed)) {
3090 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3091 Found = true;
3092 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3093 AArch64::FMULv4f32)) {
3094 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3095 Found = true;
3096 }
3097 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3098 AArch64::FMULv4i32_indexed)) {
3099 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3100 Found = true;
3101 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3102 AArch64::FMULv4f32)) {
3103 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3104 Found = true;
3105 }
3106 break;
3107
3108 case AArch64::FSUBSrr:
3109 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3110 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3111 Found = true;
3112 }
3113 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3114 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3115 Found = true;
3116 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3117 AArch64::FMULv1i32_indexed)) {
3118 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3119 Found = true;
3120 }
3121 break;
3122 case AArch64::FSUBDrr:
3123 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3124 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3125 Found = true;
3126 }
3127 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3128 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3129 Found = true;
3130 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3131 AArch64::FMULv1i64_indexed)) {
3132 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3133 Found = true;
3134 }
3135 break;
3136 case AArch64::FSUBv2f32:
3137 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3138 AArch64::FMULv2i32_indexed)) {
3139 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3140 Found = true;
3141 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3142 AArch64::FMULv2f32)) {
3143 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3144 Found = true;
3145 }
3146 break;
3147 case AArch64::FSUBv2f64:
3148 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3149 AArch64::FMULv2i64_indexed)) {
3150 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3151 Found = true;
3152 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3153 AArch64::FMULv2f64)) {
3154 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3155 Found = true;
3156 }
3157 break;
3158 case AArch64::FSUBv4f32:
3159 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3160 AArch64::FMULv4i32_indexed)) {
3161 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3162 Found = true;
3163 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3164 AArch64::FMULv4f32)) {
3165 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3166 Found = true;
3167 }
3168 break;
3169 }
3170 return Found;
3171}
3172
3173/// Return true when a code sequence can improve throughput. It
3174/// should be called only for instructions in loops.
3175/// \param Pattern - combiner pattern
3176bool
3177AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
3178 switch (Pattern) {
3179 default:
3180 break;
3181 case MachineCombinerPattern::FMULADDS_OP1:
3182 case MachineCombinerPattern::FMULADDS_OP2:
3183 case MachineCombinerPattern::FMULSUBS_OP1:
3184 case MachineCombinerPattern::FMULSUBS_OP2:
3185 case MachineCombinerPattern::FMULADDD_OP1:
3186 case MachineCombinerPattern::FMULADDD_OP2:
3187 case MachineCombinerPattern::FMULSUBD_OP1:
3188 case MachineCombinerPattern::FMULSUBD_OP2:
3189 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3190 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3191 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3192 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3193 case MachineCombinerPattern::FMLAv2f32_OP2:
3194 case MachineCombinerPattern::FMLAv2f32_OP1:
3195 case MachineCombinerPattern::FMLAv2f64_OP1:
3196 case MachineCombinerPattern::FMLAv2f64_OP2:
3197 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3198 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3199 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3200 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3201 case MachineCombinerPattern::FMLAv4f32_OP1:
3202 case MachineCombinerPattern::FMLAv4f32_OP2:
3203 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3204 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3205 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3206 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3207 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3208 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3209 case MachineCombinerPattern::FMLSv2f32_OP2:
3210 case MachineCombinerPattern::FMLSv2f64_OP2:
3211 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3212 case MachineCombinerPattern::FMLSv4f32_OP2:
3213 return true;
3214 } // end switch (Pattern)
3215 return false;
3216}
Haicheng Wu08b94622016-01-07 04:01:02 +00003217/// Return true when there is potentially a faster code sequence for an
3218/// instruction chain ending in \p Root. All potential patterns are listed in
3219/// the \p Pattern vector. Pattern should be sorted in priority order since the
3220/// pattern evaluator stops checking as soon as it finds a faster sequence.
3221
3222bool AArch64InstrInfo::getMachineCombinerPatterns(
3223 MachineInstr &Root,
3224 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003225 // Integer patterns
Haicheng Wu08b94622016-01-07 04:01:02 +00003226 if (getMaddPatterns(Root, Patterns))
3227 return true;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003228 // Floating point patterns
3229 if (getFMAPatterns(Root, Patterns))
3230 return true;
Haicheng Wu08b94622016-01-07 04:01:02 +00003231
3232 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3233}
3234
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003235enum class FMAInstKind { Default, Indexed, Accumulator };
3236/// genFusedMultiply - Generate fused multiply instructions.
3237/// This function supports both integer and floating point instructions.
3238/// A typical example:
3239/// F|MUL I=A,B,0
3240/// F|ADD R,I,C
3241/// ==> F|MADD R,A,B,C
3242/// \param Root is the F|ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003243/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003244/// contain the generated madd instruction
3245/// \param IdxMulOpd is index of operand in Root that is the result of
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003246/// the F|MUL. In the example above IdxMulOpd is 1.
3247/// \param MaddOpc the opcode fo the f|madd instruction
3248static MachineInstr *
3249genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3250 const TargetInstrInfo *TII, MachineInstr &Root,
3251 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3252 unsigned MaddOpc, const TargetRegisterClass *RC,
3253 FMAInstKind kind = FMAInstKind::Default) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003254 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3255
3256 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3257 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003258 unsigned ResultReg = Root.getOperand(0).getReg();
3259 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3260 bool Src0IsKill = MUL->getOperand(1).isKill();
3261 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3262 bool Src1IsKill = MUL->getOperand(2).isKill();
3263 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3264 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3265
3266 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3267 MRI.constrainRegClass(ResultReg, RC);
3268 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3269 MRI.constrainRegClass(SrcReg0, RC);
3270 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3271 MRI.constrainRegClass(SrcReg1, RC);
3272 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3273 MRI.constrainRegClass(SrcReg2, RC);
3274
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003275 MachineInstrBuilder MIB;
3276 if (kind == FMAInstKind::Default)
3277 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3278 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3279 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3280 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3281 else if (kind == FMAInstKind::Indexed)
3282 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3283 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3284 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3285 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3286 .addImm(MUL->getOperand(3).getImm());
3287 else if (kind == FMAInstKind::Accumulator)
3288 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3289 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3290 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3291 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3292 else
3293 assert(false && "Invalid FMA instruction kind \n");
3294 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003295 InsInstrs.push_back(MIB);
3296 return MUL;
3297}
3298
3299/// genMaddR - Generate madd instruction and combine mul and add using
3300/// an extra virtual register
3301/// Example - an ADD intermediate needs to be stored in a register:
3302/// MUL I=A,B,0
3303/// ADD R,I,Imm
3304/// ==> ORR V, ZR, Imm
3305/// ==> MADD R,A,B,V
3306/// \param Root is the ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003307/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003308/// contain the generated madd instruction
3309/// \param IdxMulOpd is index of operand in Root that is the result of
3310/// the MUL. In the example above IdxMulOpd is 1.
3311/// \param MaddOpc the opcode fo the madd instruction
3312/// \param VR is a virtual register that holds the value of an ADD operand
3313/// (V in the example above).
3314static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3315 const TargetInstrInfo *TII, MachineInstr &Root,
3316 SmallVectorImpl<MachineInstr *> &InsInstrs,
3317 unsigned IdxMulOpd, unsigned MaddOpc,
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003318 unsigned VR, const TargetRegisterClass *RC) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003319 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3320
3321 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003322 unsigned ResultReg = Root.getOperand(0).getReg();
3323 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3324 bool Src0IsKill = MUL->getOperand(1).isKill();
3325 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3326 bool Src1IsKill = MUL->getOperand(2).isKill();
3327
3328 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3329 MRI.constrainRegClass(ResultReg, RC);
3330 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3331 MRI.constrainRegClass(SrcReg0, RC);
3332 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3333 MRI.constrainRegClass(SrcReg1, RC);
3334 if (TargetRegisterInfo::isVirtualRegister(VR))
3335 MRI.constrainRegClass(VR, RC);
3336
3337 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
3338 ResultReg)
3339 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3340 .addReg(SrcReg1, getKillRegState(Src1IsKill))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003341 .addReg(VR);
3342 // Insert the MADD
3343 InsInstrs.push_back(MIB);
3344 return MUL;
3345}
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003346
Sanjay Patelcfe03932015-06-19 23:21:42 +00003347/// When getMachineCombinerPatterns() finds potential patterns,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003348/// this function generates the instructions that could replace the
3349/// original code sequence
3350void AArch64InstrInfo::genAlternativeCodeSequence(
Sanjay Patel387e66e2015-11-05 19:34:57 +00003351 MachineInstr &Root, MachineCombinerPattern Pattern,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003352 SmallVectorImpl<MachineInstr *> &InsInstrs,
3353 SmallVectorImpl<MachineInstr *> &DelInstrs,
3354 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3355 MachineBasicBlock &MBB = *Root.getParent();
3356 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3357 MachineFunction &MF = *MBB.getParent();
Eric Christophere0818912014-09-03 20:36:26 +00003358 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003359
3360 MachineInstr *MUL;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003361 const TargetRegisterClass *RC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003362 unsigned Opc;
3363 switch (Pattern) {
3364 default:
Haicheng Wu08b94622016-01-07 04:01:02 +00003365 // Reassociate instructions.
3366 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3367 DelInstrs, InstrIdxForVirtReg);
3368 return;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003369 case MachineCombinerPattern::MULADDW_OP1:
3370 case MachineCombinerPattern::MULADDX_OP1:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003371 // MUL I=A,B,0
3372 // ADD R,I,C
3373 // ==> MADD R,A,B,C
3374 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003375 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003376 Opc = AArch64::MADDWrrr;
3377 RC = &AArch64::GPR32RegClass;
3378 } else {
3379 Opc = AArch64::MADDXrrr;
3380 RC = &AArch64::GPR64RegClass;
3381 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003382 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003383 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003384 case MachineCombinerPattern::MULADDW_OP2:
3385 case MachineCombinerPattern::MULADDX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003386 // MUL I=A,B,0
3387 // ADD R,C,I
3388 // ==> MADD R,A,B,C
3389 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003390 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003391 Opc = AArch64::MADDWrrr;
3392 RC = &AArch64::GPR32RegClass;
3393 } else {
3394 Opc = AArch64::MADDXrrr;
3395 RC = &AArch64::GPR64RegClass;
3396 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003397 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003398 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003399 case MachineCombinerPattern::MULADDWI_OP1:
3400 case MachineCombinerPattern::MULADDXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003401 // MUL I=A,B,0
3402 // ADD R,I,Imm
3403 // ==> ORR V, ZR, Imm
3404 // ==> MADD R,A,B,V
3405 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003406 const TargetRegisterClass *OrrRC;
3407 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003408 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003409 OrrOpc = AArch64::ORRWri;
3410 OrrRC = &AArch64::GPR32spRegClass;
3411 BitSize = 32;
3412 ZeroReg = AArch64::WZR;
3413 Opc = AArch64::MADDWrrr;
3414 RC = &AArch64::GPR32RegClass;
3415 } else {
3416 OrrOpc = AArch64::ORRXri;
3417 OrrRC = &AArch64::GPR64spRegClass;
3418 BitSize = 64;
3419 ZeroReg = AArch64::XZR;
3420 Opc = AArch64::MADDXrrr;
3421 RC = &AArch64::GPR64RegClass;
3422 }
3423 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3424 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003425
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003426 if (Root.getOperand(3).isImm()) {
3427 unsigned Val = Root.getOperand(3).getImm();
3428 Imm = Imm << Val;
3429 }
3430 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
3431 uint64_t Encoding;
3432 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3433 MachineInstrBuilder MIB1 =
3434 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3435 .addReg(ZeroReg)
3436 .addImm(Encoding);
3437 InsInstrs.push_back(MIB1);
3438 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3439 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003440 }
3441 break;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003442 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003443 case MachineCombinerPattern::MULSUBW_OP1:
3444 case MachineCombinerPattern::MULSUBX_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003445 // MUL I=A,B,0
3446 // SUB R,I, C
3447 // ==> SUB V, 0, C
3448 // ==> MADD R,A,B,V // = -C + A*B
3449 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003450 const TargetRegisterClass *SubRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003451 unsigned SubOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003452 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003453 SubOpc = AArch64::SUBWrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003454 SubRC = &AArch64::GPR32spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003455 ZeroReg = AArch64::WZR;
3456 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003457 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003458 } else {
3459 SubOpc = AArch64::SUBXrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003460 SubRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003461 ZeroReg = AArch64::XZR;
3462 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003463 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003464 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003465 unsigned NewVR = MRI.createVirtualRegister(SubRC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003466 // SUB NewVR, 0, C
3467 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003468 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003469 .addReg(ZeroReg)
3470 .addOperand(Root.getOperand(2));
3471 InsInstrs.push_back(MIB1);
3472 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003473 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3474 break;
3475 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003476 case MachineCombinerPattern::MULSUBW_OP2:
3477 case MachineCombinerPattern::MULSUBX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003478 // MUL I=A,B,0
3479 // SUB R,C,I
3480 // ==> MSUB R,A,B,C (computes C - A*B)
3481 // --- Create(MSUB);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003482 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003483 Opc = AArch64::MSUBWrrr;
3484 RC = &AArch64::GPR32RegClass;
3485 } else {
3486 Opc = AArch64::MSUBXrrr;
3487 RC = &AArch64::GPR64RegClass;
3488 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003489 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003490 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003491 case MachineCombinerPattern::MULSUBWI_OP1:
3492 case MachineCombinerPattern::MULSUBXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003493 // MUL I=A,B,0
3494 // SUB R,I, Imm
3495 // ==> ORR V, ZR, -Imm
3496 // ==> MADD R,A,B,V // = -Imm + A*B
3497 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003498 const TargetRegisterClass *OrrRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003499 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003500 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
Juergen Ributzka25816b02014-08-30 06:16:26 +00003501 OrrOpc = AArch64::ORRWri;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003502 OrrRC = &AArch64::GPR32spRegClass;
3503 BitSize = 32;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003504 ZeroReg = AArch64::WZR;
3505 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003506 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003507 } else {
3508 OrrOpc = AArch64::ORRXri;
Juergen Ributzkaf9660f02014-11-04 22:20:07 +00003509 OrrRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003510 BitSize = 64;
3511 ZeroReg = AArch64::XZR;
3512 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003513 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003514 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003515 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003516 int Imm = Root.getOperand(2).getImm();
3517 if (Root.getOperand(3).isImm()) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003518 unsigned Val = Root.getOperand(3).getImm();
3519 Imm = Imm << Val;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003520 }
3521 uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
3522 uint64_t Encoding;
3523 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3524 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003525 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003526 .addReg(ZeroReg)
3527 .addImm(Encoding);
3528 InsInstrs.push_back(MIB1);
3529 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003530 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003531 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003532 break;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003533 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003534 // Floating Point Support
3535 case MachineCombinerPattern::FMULADDS_OP1:
3536 case MachineCombinerPattern::FMULADDD_OP1:
3537 // MUL I=A,B,0
3538 // ADD R,I,C
3539 // ==> MADD R,A,B,C
3540 // --- Create(MADD);
3541 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3542 Opc = AArch64::FMADDSrrr;
3543 RC = &AArch64::FPR32RegClass;
3544 } else {
3545 Opc = AArch64::FMADDDrrr;
3546 RC = &AArch64::FPR64RegClass;
3547 }
3548 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3549 break;
3550 case MachineCombinerPattern::FMULADDS_OP2:
3551 case MachineCombinerPattern::FMULADDD_OP2:
3552 // FMUL I=A,B,0
3553 // FADD R,C,I
3554 // ==> FMADD R,A,B,C
3555 // --- Create(FMADD);
3556 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3557 Opc = AArch64::FMADDSrrr;
3558 RC = &AArch64::FPR32RegClass;
3559 } else {
3560 Opc = AArch64::FMADDDrrr;
3561 RC = &AArch64::FPR64RegClass;
3562 }
3563 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3564 break;
3565
3566 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3567 Opc = AArch64::FMLAv1i32_indexed;
3568 RC = &AArch64::FPR32RegClass;
3569 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3570 FMAInstKind::Indexed);
3571 break;
3572 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3573 Opc = AArch64::FMLAv1i32_indexed;
3574 RC = &AArch64::FPR32RegClass;
3575 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3576 FMAInstKind::Indexed);
3577 break;
3578
3579 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3580 Opc = AArch64::FMLAv1i64_indexed;
3581 RC = &AArch64::FPR64RegClass;
3582 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3583 FMAInstKind::Indexed);
3584 break;
3585 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3586 Opc = AArch64::FMLAv1i64_indexed;
3587 RC = &AArch64::FPR64RegClass;
3588 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3589 FMAInstKind::Indexed);
3590 break;
3591
3592 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3593 case MachineCombinerPattern::FMLAv2f32_OP1:
3594 RC = &AArch64::FPR64RegClass;
3595 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
3596 Opc = AArch64::FMLAv2i32_indexed;
3597 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3598 FMAInstKind::Indexed);
3599 } else {
3600 Opc = AArch64::FMLAv2f32;
3601 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3602 FMAInstKind::Accumulator);
3603 }
3604 break;
3605 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3606 case MachineCombinerPattern::FMLAv2f32_OP2:
3607 RC = &AArch64::FPR64RegClass;
3608 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
3609 Opc = AArch64::FMLAv2i32_indexed;
3610 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3611 FMAInstKind::Indexed);
3612 } else {
3613 Opc = AArch64::FMLAv2f32;
3614 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3615 FMAInstKind::Accumulator);
3616 }
3617 break;
3618
3619 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3620 case MachineCombinerPattern::FMLAv2f64_OP1:
3621 RC = &AArch64::FPR128RegClass;
3622 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
3623 Opc = AArch64::FMLAv2i64_indexed;
3624 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3625 FMAInstKind::Indexed);
3626 } else {
3627 Opc = AArch64::FMLAv2f64;
3628 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3629 FMAInstKind::Accumulator);
3630 }
3631 break;
3632 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3633 case MachineCombinerPattern::FMLAv2f64_OP2:
3634 RC = &AArch64::FPR128RegClass;
3635 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
3636 Opc = AArch64::FMLAv2i64_indexed;
3637 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3638 FMAInstKind::Indexed);
3639 } else {
3640 Opc = AArch64::FMLAv2f64;
3641 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3642 FMAInstKind::Accumulator);
3643 }
3644 break;
3645
3646 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3647 case MachineCombinerPattern::FMLAv4f32_OP1:
3648 RC = &AArch64::FPR128RegClass;
3649 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
3650 Opc = AArch64::FMLAv4i32_indexed;
3651 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3652 FMAInstKind::Indexed);
3653 } else {
3654 Opc = AArch64::FMLAv4f32;
3655 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3656 FMAInstKind::Accumulator);
3657 }
3658 break;
3659
3660 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3661 case MachineCombinerPattern::FMLAv4f32_OP2:
3662 RC = &AArch64::FPR128RegClass;
3663 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
3664 Opc = AArch64::FMLAv4i32_indexed;
3665 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3666 FMAInstKind::Indexed);
3667 } else {
3668 Opc = AArch64::FMLAv4f32;
3669 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3670 FMAInstKind::Accumulator);
3671 }
3672 break;
3673
3674 case MachineCombinerPattern::FMULSUBS_OP1:
3675 case MachineCombinerPattern::FMULSUBD_OP1: {
3676 // FMUL I=A,B,0
3677 // FSUB R,I,C
3678 // ==> FNMSUB R,A,B,C // = -C + A*B
3679 // --- Create(FNMSUB);
3680 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
3681 Opc = AArch64::FNMSUBSrrr;
3682 RC = &AArch64::FPR32RegClass;
3683 } else {
3684 Opc = AArch64::FNMSUBDrrr;
3685 RC = &AArch64::FPR64RegClass;
3686 }
3687 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3688 break;
3689 }
3690 case MachineCombinerPattern::FMULSUBS_OP2:
3691 case MachineCombinerPattern::FMULSUBD_OP2: {
3692 // FMUL I=A,B,0
3693 // FSUB R,C,I
3694 // ==> FMSUB R,A,B,C (computes C - A*B)
3695 // --- Create(FMSUB);
3696 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
3697 Opc = AArch64::FMSUBSrrr;
3698 RC = &AArch64::FPR32RegClass;
3699 } else {
3700 Opc = AArch64::FMSUBDrrr;
3701 RC = &AArch64::FPR64RegClass;
3702 }
3703 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3704 break;
3705
3706 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3707 Opc = AArch64::FMLSv1i32_indexed;
3708 RC = &AArch64::FPR32RegClass;
3709 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3710 FMAInstKind::Indexed);
3711 break;
3712
3713 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3714 Opc = AArch64::FMLSv1i64_indexed;
3715 RC = &AArch64::FPR64RegClass;
3716 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3717 FMAInstKind::Indexed);
3718 break;
3719
3720 case MachineCombinerPattern::FMLSv2f32_OP2:
3721 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3722 RC = &AArch64::FPR64RegClass;
3723 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
3724 Opc = AArch64::FMLSv2i32_indexed;
3725 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3726 FMAInstKind::Indexed);
3727 } else {
3728 Opc = AArch64::FMLSv2f32;
3729 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3730 FMAInstKind::Accumulator);
3731 }
3732 break;
3733
3734 case MachineCombinerPattern::FMLSv2f64_OP2:
3735 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3736 RC = &AArch64::FPR128RegClass;
3737 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
3738 Opc = AArch64::FMLSv2i64_indexed;
3739 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3740 FMAInstKind::Indexed);
3741 } else {
3742 Opc = AArch64::FMLSv2f64;
3743 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3744 FMAInstKind::Accumulator);
3745 }
3746 break;
3747
3748 case MachineCombinerPattern::FMLSv4f32_OP2:
3749 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3750 RC = &AArch64::FPR128RegClass;
3751 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
3752 Opc = AArch64::FMLSv4i32_indexed;
3753 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3754 FMAInstKind::Indexed);
3755 } else {
3756 Opc = AArch64::FMLSv4f32;
3757 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3758 FMAInstKind::Accumulator);
3759 }
3760 break;
3761 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003762 } // end switch (Pattern)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003763 // Record MUL and ADD/SUB for deletion
3764 DelInstrs.push_back(MUL);
3765 DelInstrs.push_back(&Root);
3766
3767 return;
3768}
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003769
3770/// \brief Replace csincr-branch sequence by simple conditional branch
3771///
3772/// Examples:
3773/// 1.
3774/// csinc w9, wzr, wzr, <condition code>
3775/// tbnz w9, #0, 0x44
3776/// to
3777/// b.<inverted condition code>
3778///
3779/// 2.
3780/// csinc w9, wzr, wzr, <condition code>
3781/// tbz w9, #0, 0x44
3782/// to
3783/// b.<condition code>
3784///
Chad Rosier4aeab5f2016-03-21 13:43:58 +00003785/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
3786/// compare's constant operand is power of 2.
Balaram Makame9b27252016-03-10 17:54:55 +00003787///
3788/// Examples:
3789/// and w8, w8, #0x400
3790/// cbnz w8, L1
3791/// to
3792/// tbnz w8, #10, L1
3793///
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003794/// \param MI Conditional Branch
3795/// \return True when the simple conditional branch is generated
3796///
3797bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
3798 bool IsNegativeBranch = false;
3799 bool IsTestAndBranch = false;
3800 unsigned TargetBBInMI = 0;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003801 switch (MI->getOpcode()) {
3802 default:
3803 llvm_unreachable("Unknown branch instruction?");
3804 case AArch64::Bcc:
3805 return false;
3806 case AArch64::CBZW:
3807 case AArch64::CBZX:
3808 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003809 break;
3810 case AArch64::CBNZW:
3811 case AArch64::CBNZX:
3812 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003813 IsNegativeBranch = true;
3814 break;
3815 case AArch64::TBZW:
3816 case AArch64::TBZX:
3817 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003818 IsTestAndBranch = true;
3819 break;
3820 case AArch64::TBNZW:
3821 case AArch64::TBNZX:
3822 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003823 IsNegativeBranch = true;
3824 IsTestAndBranch = true;
3825 break;
3826 }
3827 // So we increment a zero register and test for bits other
3828 // than bit 0? Conservatively bail out in case the verifier
3829 // missed this case.
3830 if (IsTestAndBranch && MI->getOperand(1).getImm())
3831 return false;
3832
3833 // Find Definition.
3834 assert(MI->getParent() && "Incomplete machine instruciton\n");
3835 MachineBasicBlock *MBB = MI->getParent();
3836 MachineFunction *MF = MBB->getParent();
3837 MachineRegisterInfo *MRI = &MF->getRegInfo();
3838 unsigned VReg = MI->getOperand(0).getReg();
3839 if (!TargetRegisterInfo::isVirtualRegister(VReg))
3840 return false;
3841
3842 MachineInstr *DefMI = MRI->getVRegDef(VReg);
3843
Balaram Makame9b27252016-03-10 17:54:55 +00003844 // Look through COPY instructions to find definition.
3845 while (DefMI->isCopy()) {
3846 unsigned CopyVReg = DefMI->getOperand(1).getReg();
3847 if (!MRI->hasOneNonDBGUse(CopyVReg))
3848 return false;
3849 if (!MRI->hasOneDef(CopyVReg))
3850 return false;
3851 DefMI = MRI->getVRegDef(CopyVReg);
3852 }
3853
3854 switch (DefMI->getOpcode()) {
3855 default:
3856 return false;
3857 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
3858 case AArch64::ANDWri:
3859 case AArch64::ANDXri: {
3860 if (IsTestAndBranch)
3861 return false;
3862 if (DefMI->getParent() != MBB)
3863 return false;
3864 if (!MRI->hasOneNonDBGUse(VReg))
3865 return false;
3866
Quentin Colombetabe2d012016-04-25 20:54:08 +00003867 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
Balaram Makame9b27252016-03-10 17:54:55 +00003868 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
Quentin Colombetabe2d012016-04-25 20:54:08 +00003869 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
Balaram Makame9b27252016-03-10 17:54:55 +00003870 if (!isPowerOf2_64(Mask))
3871 return false;
3872
3873 MachineOperand &MO = DefMI->getOperand(1);
3874 unsigned NewReg = MO.getReg();
3875 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
3876 return false;
3877
3878 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
3879
3880 MachineBasicBlock &RefToMBB = *MBB;
3881 MachineBasicBlock *TBB = MI->getOperand(1).getMBB();
3882 DebugLoc DL = MI->getDebugLoc();
3883 unsigned Imm = Log2_64(Mask);
Renato Golin179d1f52016-04-23 19:30:52 +00003884 unsigned Opc = (Imm < 32)
3885 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
3886 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
Quentin Colombetabe2d012016-04-25 20:54:08 +00003887 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
3888 .addReg(NewReg)
3889 .addImm(Imm)
3890 .addMBB(TBB);
3891
3892 // For immediate smaller than 32, we need to use the 32-bit
3893 // variant (W) in all cases. Indeed the 64-bit variant does not
3894 // allow to encode them.
3895 // Therefore, if the input register is 64-bit, we need to take the
3896 // 32-bit sub-part.
3897 if (!Is32Bit && Imm < 32)
3898 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
Balaram Makame9b27252016-03-10 17:54:55 +00003899 MI->eraseFromParent();
3900 return true;
3901 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003902 // Look for CSINC
Balaram Makame9b27252016-03-10 17:54:55 +00003903 case AArch64::CSINCWr:
3904 case AArch64::CSINCXr: {
3905 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
3906 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
3907 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
3908 DefMI->getOperand(2).getReg() == AArch64::XZR))
3909 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003910
Balaram Makame9b27252016-03-10 17:54:55 +00003911 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
3912 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003913
Balaram Makame9b27252016-03-10 17:54:55 +00003914 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
Balaram Makame9b27252016-03-10 17:54:55 +00003915 // Convert only when the condition code is not modified between
3916 // the CSINC and the branch. The CC may be used by other
3917 // instructions in between.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00003918 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
Balaram Makame9b27252016-03-10 17:54:55 +00003919 return false;
3920 MachineBasicBlock &RefToMBB = *MBB;
3921 MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB();
3922 DebugLoc DL = MI->getDebugLoc();
3923 if (IsNegativeBranch)
3924 CC = AArch64CC::getInvertedCondCode(CC);
3925 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
3926 MI->eraseFromParent();
3927 return true;
3928 }
3929 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003930}
Alex Lorenzf3630112015-08-18 22:52:15 +00003931
3932std::pair<unsigned, unsigned>
3933AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
3934 const unsigned Mask = AArch64II::MO_FRAGMENT;
3935 return std::make_pair(TF & Mask, TF & ~Mask);
3936}
3937
3938ArrayRef<std::pair<unsigned, const char *>>
3939AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
3940 using namespace AArch64II;
Hal Finkel982e8d42015-08-30 08:07:29 +00003941 static const std::pair<unsigned, const char *> TargetFlags[] = {
Alex Lorenzf3630112015-08-18 22:52:15 +00003942 {MO_PAGE, "aarch64-page"},
3943 {MO_PAGEOFF, "aarch64-pageoff"},
3944 {MO_G3, "aarch64-g3"},
3945 {MO_G2, "aarch64-g2"},
3946 {MO_G1, "aarch64-g1"},
3947 {MO_G0, "aarch64-g0"},
3948 {MO_HI12, "aarch64-hi12"}};
3949 return makeArrayRef(TargetFlags);
3950}
3951
3952ArrayRef<std::pair<unsigned, const char *>>
3953AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
3954 using namespace AArch64II;
Hal Finkel982e8d42015-08-30 08:07:29 +00003955 static const std::pair<unsigned, const char *> TargetFlags[] = {
Alex Lorenzf3630112015-08-18 22:52:15 +00003956 {MO_GOT, "aarch64-got"},
3957 {MO_NC, "aarch64-nc"},
3958 {MO_TLS, "aarch64-tls"},
3959 {MO_CONSTPOOL, "aarch64-constant-pool"}};
3960 return makeArrayRef(TargetFlags);
3961}